{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import lightgbm as lgb\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.externals import joblib\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data path is assumed to be 'data' under repo root\n",
    "def csv_loader(filename, datapath = \"./data\", stats = True):\n",
    "    print('Loading %s.' % filename)\n",
    "    path = os.path.join(datapath, filename)\n",
    "    try:\n",
    "        with open(path, 'rb') as f:\n",
    "              data = pd.read_csv(f)\n",
    "    except Exception as e:\n",
    "        print('Unable to load data ', path, ':', e)\n",
    "    if stats:\n",
    "        print(\"{:d} rows of data loaded.\".format(len(data)))\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def merge_feature(df_base, df_merge, primary_key = \"SK_ID_CURR\", rename = dict(), suffixes = (\"\", \"\")):\n",
    "    df_merge = df_merge.rename(columns=rename)\n",
    "    df_base = df_base.merge(df_merge, on = primary_key, how = \"left\", suffixes = suffixes)\n",
    "    return df_base"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## bureau.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading bureau.csv.\n",
      "1716428 rows of data loaded.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>SK_ID_BUREAU</th>\n",
       "      <th>CREDIT_ACTIVE</th>\n",
       "      <th>CREDIT_CURRENCY</th>\n",
       "      <th>DAYS_CREDIT</th>\n",
       "      <th>CREDIT_DAY_OVERDUE</th>\n",
       "      <th>DAYS_CREDIT_ENDDATE</th>\n",
       "      <th>DAYS_ENDDATE_FACT</th>\n",
       "      <th>AMT_CREDIT_MAX_OVERDUE</th>\n",
       "      <th>CNT_CREDIT_PROLONG</th>\n",
       "      <th>AMT_CREDIT_SUM</th>\n",
       "      <th>AMT_CREDIT_SUM_DEBT</th>\n",
       "      <th>AMT_CREDIT_SUM_LIMIT</th>\n",
       "      <th>AMT_CREDIT_SUM_OVERDUE</th>\n",
       "      <th>CREDIT_TYPE</th>\n",
       "      <th>DAYS_CREDIT_UPDATE</th>\n",
       "      <th>AMT_ANNUITY</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>215354</td>\n",
       "      <td>5714462</td>\n",
       "      <td>Closed</td>\n",
       "      <td>currency 1</td>\n",
       "      <td>-497</td>\n",
       "      <td>0</td>\n",
       "      <td>-153.0</td>\n",
       "      <td>-153.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>91323.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Consumer credit</td>\n",
       "      <td>-131</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>215354</td>\n",
       "      <td>5714463</td>\n",
       "      <td>Active</td>\n",
       "      <td>currency 1</td>\n",
       "      <td>-208</td>\n",
       "      <td>0</td>\n",
       "      <td>1075.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>225000.0</td>\n",
       "      <td>171342.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Credit card</td>\n",
       "      <td>-20</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>215354</td>\n",
       "      <td>5714464</td>\n",
       "      <td>Active</td>\n",
       "      <td>currency 1</td>\n",
       "      <td>-203</td>\n",
       "      <td>0</td>\n",
       "      <td>528.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>464323.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Consumer credit</td>\n",
       "      <td>-16</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR  SK_ID_BUREAU CREDIT_ACTIVE CREDIT_CURRENCY  DAYS_CREDIT  \\\n",
       "0      215354       5714462        Closed      currency 1         -497   \n",
       "1      215354       5714463        Active      currency 1         -208   \n",
       "2      215354       5714464        Active      currency 1         -203   \n",
       "\n",
       "   CREDIT_DAY_OVERDUE  DAYS_CREDIT_ENDDATE  DAYS_ENDDATE_FACT  \\\n",
       "0                   0               -153.0             -153.0   \n",
       "1                   0               1075.0                NaN   \n",
       "2                   0                528.0                NaN   \n",
       "\n",
       "   AMT_CREDIT_MAX_OVERDUE  CNT_CREDIT_PROLONG  AMT_CREDIT_SUM  \\\n",
       "0                     NaN                   0         91323.0   \n",
       "1                     NaN                   0        225000.0   \n",
       "2                     NaN                   0        464323.5   \n",
       "\n",
       "   AMT_CREDIT_SUM_DEBT  AMT_CREDIT_SUM_LIMIT  AMT_CREDIT_SUM_OVERDUE  \\\n",
       "0                  0.0                   NaN                     0.0   \n",
       "1             171342.0                   NaN                     0.0   \n",
       "2                  NaN                   NaN                     0.0   \n",
       "\n",
       "       CREDIT_TYPE  DAYS_CREDIT_UPDATE  AMT_ANNUITY  \n",
       "0  Consumer credit                -131          NaN  \n",
       "1      Credit card                 -20          NaN  \n",
       "2  Consumer credit                 -16          NaN  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bureau = csv_loader(\"bureau.csv\")\n",
    "df_bureau.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>SK_ID_BUREAU</th>\n",
       "      <th>CREDIT_ACTIVE</th>\n",
       "      <th>CREDIT_CURRENCY</th>\n",
       "      <th>DAYS_CREDIT</th>\n",
       "      <th>CREDIT_DAY_OVERDUE</th>\n",
       "      <th>DAYS_CREDIT_ENDDATE</th>\n",
       "      <th>DAYS_ENDDATE_FACT</th>\n",
       "      <th>AMT_CREDIT_MAX_OVERDUE</th>\n",
       "      <th>CNT_CREDIT_PROLONG</th>\n",
       "      <th>AMT_CREDIT_SUM</th>\n",
       "      <th>AMT_CREDIT_SUM_DEBT</th>\n",
       "      <th>AMT_CREDIT_SUM_LIMIT</th>\n",
       "      <th>AMT_CREDIT_SUM_OVERDUE</th>\n",
       "      <th>CREDIT_TYPE</th>\n",
       "      <th>DAYS_CREDIT_UPDATE</th>\n",
       "      <th>AMT_ANNUITY</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>1716428</td>\n",
       "      <td>1716428</td>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>1.610875e+06</td>\n",
       "      <td>1.082775e+06</td>\n",
       "      <td>5.919400e+05</td>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>1.716415e+06</td>\n",
       "      <td>1.458759e+06</td>\n",
       "      <td>1.124648e+06</td>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>1716428</td>\n",
       "      <td>1.716428e+06</td>\n",
       "      <td>4.896370e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Closed</td>\n",
       "      <td>currency 1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Consumer credit</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1079273</td>\n",
       "      <td>1715020</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1251615</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.782149e+05</td>\n",
       "      <td>5.924434e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.142108e+03</td>\n",
       "      <td>8.181666e-01</td>\n",
       "      <td>5.105174e+02</td>\n",
       "      <td>-1.017437e+03</td>\n",
       "      <td>3.825418e+03</td>\n",
       "      <td>6.410406e-03</td>\n",
       "      <td>3.549946e+05</td>\n",
       "      <td>1.370851e+05</td>\n",
       "      <td>6.229515e+03</td>\n",
       "      <td>3.791276e+01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-5.937483e+02</td>\n",
       "      <td>1.571276e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.029386e+05</td>\n",
       "      <td>5.322657e+05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.951649e+02</td>\n",
       "      <td>3.654443e+01</td>\n",
       "      <td>4.994220e+03</td>\n",
       "      <td>7.140106e+02</td>\n",
       "      <td>2.060316e+05</td>\n",
       "      <td>9.622391e-02</td>\n",
       "      <td>1.149811e+06</td>\n",
       "      <td>6.774011e+05</td>\n",
       "      <td>4.503203e+04</td>\n",
       "      <td>5.937650e+03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.207473e+02</td>\n",
       "      <td>3.258269e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000010e+05</td>\n",
       "      <td>5.000000e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-2.922000e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-4.206000e+04</td>\n",
       "      <td>-4.202300e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-4.705600e+06</td>\n",
       "      <td>-5.864061e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-4.194700e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.888668e+05</td>\n",
       "      <td>5.463954e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.666000e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-1.138000e+03</td>\n",
       "      <td>-1.489000e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.130000e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-9.080000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.780550e+05</td>\n",
       "      <td>5.926304e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-9.870000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-3.300000e+02</td>\n",
       "      <td>-8.970000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.255185e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-3.950000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.674260e+05</td>\n",
       "      <td>6.385681e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-4.740000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>4.740000e+02</td>\n",
       "      <td>-4.250000e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.150000e+05</td>\n",
       "      <td>4.015350e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-3.300000e+01</td>\n",
       "      <td>1.350000e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.562550e+05</td>\n",
       "      <td>6.843457e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.792000e+03</td>\n",
       "      <td>3.119900e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.159872e+08</td>\n",
       "      <td>9.000000e+00</td>\n",
       "      <td>5.850000e+08</td>\n",
       "      <td>1.701000e+08</td>\n",
       "      <td>4.705600e+06</td>\n",
       "      <td>3.756681e+06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.720000e+02</td>\n",
       "      <td>1.184534e+08</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          SK_ID_CURR  SK_ID_BUREAU CREDIT_ACTIVE CREDIT_CURRENCY  \\\n",
       "count   1.716428e+06  1.716428e+06       1716428         1716428   \n",
       "unique           NaN           NaN             4               4   \n",
       "top              NaN           NaN        Closed      currency 1   \n",
       "freq             NaN           NaN       1079273         1715020   \n",
       "mean    2.782149e+05  5.924434e+06           NaN             NaN   \n",
       "std     1.029386e+05  5.322657e+05           NaN             NaN   \n",
       "min     1.000010e+05  5.000000e+06           NaN             NaN   \n",
       "25%     1.888668e+05  5.463954e+06           NaN             NaN   \n",
       "50%     2.780550e+05  5.926304e+06           NaN             NaN   \n",
       "75%     3.674260e+05  6.385681e+06           NaN             NaN   \n",
       "max     4.562550e+05  6.843457e+06           NaN             NaN   \n",
       "\n",
       "         DAYS_CREDIT  CREDIT_DAY_OVERDUE  DAYS_CREDIT_ENDDATE  \\\n",
       "count   1.716428e+06        1.716428e+06         1.610875e+06   \n",
       "unique           NaN                 NaN                  NaN   \n",
       "top              NaN                 NaN                  NaN   \n",
       "freq             NaN                 NaN                  NaN   \n",
       "mean   -1.142108e+03        8.181666e-01         5.105174e+02   \n",
       "std     7.951649e+02        3.654443e+01         4.994220e+03   \n",
       "min    -2.922000e+03        0.000000e+00        -4.206000e+04   \n",
       "25%    -1.666000e+03        0.000000e+00        -1.138000e+03   \n",
       "50%    -9.870000e+02        0.000000e+00        -3.300000e+02   \n",
       "75%    -4.740000e+02        0.000000e+00         4.740000e+02   \n",
       "max     0.000000e+00        2.792000e+03         3.119900e+04   \n",
       "\n",
       "        DAYS_ENDDATE_FACT  AMT_CREDIT_MAX_OVERDUE  CNT_CREDIT_PROLONG  \\\n",
       "count        1.082775e+06            5.919400e+05        1.716428e+06   \n",
       "unique                NaN                     NaN                 NaN   \n",
       "top                   NaN                     NaN                 NaN   \n",
       "freq                  NaN                     NaN                 NaN   \n",
       "mean        -1.017437e+03            3.825418e+03        6.410406e-03   \n",
       "std          7.140106e+02            2.060316e+05        9.622391e-02   \n",
       "min         -4.202300e+04            0.000000e+00        0.000000e+00   \n",
       "25%         -1.489000e+03            0.000000e+00        0.000000e+00   \n",
       "50%         -8.970000e+02            0.000000e+00        0.000000e+00   \n",
       "75%         -4.250000e+02            0.000000e+00        0.000000e+00   \n",
       "max          0.000000e+00            1.159872e+08        9.000000e+00   \n",
       "\n",
       "        AMT_CREDIT_SUM  AMT_CREDIT_SUM_DEBT  AMT_CREDIT_SUM_LIMIT  \\\n",
       "count     1.716415e+06         1.458759e+06          1.124648e+06   \n",
       "unique             NaN                  NaN                   NaN   \n",
       "top                NaN                  NaN                   NaN   \n",
       "freq               NaN                  NaN                   NaN   \n",
       "mean      3.549946e+05         1.370851e+05          6.229515e+03   \n",
       "std       1.149811e+06         6.774011e+05          4.503203e+04   \n",
       "min       0.000000e+00        -4.705600e+06         -5.864061e+05   \n",
       "25%       5.130000e+04         0.000000e+00          0.000000e+00   \n",
       "50%       1.255185e+05         0.000000e+00          0.000000e+00   \n",
       "75%       3.150000e+05         4.015350e+04          0.000000e+00   \n",
       "max       5.850000e+08         1.701000e+08          4.705600e+06   \n",
       "\n",
       "        AMT_CREDIT_SUM_OVERDUE      CREDIT_TYPE  DAYS_CREDIT_UPDATE  \\\n",
       "count             1.716428e+06          1716428        1.716428e+06   \n",
       "unique                     NaN               15                 NaN   \n",
       "top                        NaN  Consumer credit                 NaN   \n",
       "freq                       NaN          1251615                 NaN   \n",
       "mean              3.791276e+01              NaN       -5.937483e+02   \n",
       "std               5.937650e+03              NaN        7.207473e+02   \n",
       "min               0.000000e+00              NaN       -4.194700e+04   \n",
       "25%               0.000000e+00              NaN       -9.080000e+02   \n",
       "50%               0.000000e+00              NaN       -3.950000e+02   \n",
       "75%               0.000000e+00              NaN       -3.300000e+01   \n",
       "max               3.756681e+06              NaN        3.720000e+02   \n",
       "\n",
       "         AMT_ANNUITY  \n",
       "count   4.896370e+05  \n",
       "unique           NaN  \n",
       "top              NaN  \n",
       "freq             NaN  \n",
       "mean    1.571276e+04  \n",
       "std     3.258269e+05  \n",
       "min     0.000000e+00  \n",
       "25%     0.000000e+00  \n",
       "50%     0.000000e+00  \n",
       "75%     1.350000e+04  \n",
       "max     1.184534e+08  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bureau.describe(include = \"all\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# application{train|test}.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading application_train.csv.\n",
      "307511 rows of data loaded.\n",
      "Loading application_test.csv.\n",
      "48744 rows of data loaded.\n"
     ]
    }
   ],
   "source": [
    "df_train = csv_loader(\"application_train.csv\")\n",
    "df_test = csv_loader(\"application_test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   SK_ID_CURR train_test\n",
      "0      100002      train\n",
      "       SK_ID_CURR train_test\n",
      "48743      456250       test\n"
     ]
    }
   ],
   "source": [
    "df_train[\"train_test\"] = \"train\"\n",
    "df_test[\"train_test\"] = \"test\"\n",
    "# base feature table only inculde an ID is in train/test\n",
    "df_feat = pd.concat([df_train[[\"SK_ID_CURR\", \"train_test\"]], df_test[[\"SK_ID_CURR\", \"train_test\"]]])\n",
    "print(df_feat.head(1))\n",
    "print(df_feat.tail(1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature Engineering"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### SK_ID_BUREAU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100003</td>\n",
       "      <td>train</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100004</td>\n",
       "      <td>train</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau\n",
       "0      100002      train           8.0\n",
       "1      100003      train           4.0\n",
       "2      100004      train           2.0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bureau_hist = df_bureau.groupby(\"SK_ID_CURR\").count()[[\"SK_ID_BUREAU\"]].reset_index() # base feature table only inculde count bureau\n",
    "df_feat = merge_feature(df_feat, bureau_hist, rename = {\"SK_ID_BUREAU\" : \"count_bureau\"})\n",
    "df_feat.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau\n",
       "0      100002      train           8.0      False"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ID with no bureau evaluation\n",
    "df_feat[[\"count_bureau\"]].isnull().sum(axis=0)\n",
    "df_feat[\"no_bureau\"] = df_feat[[\"count_bureau\"]].isnull()\n",
    "df_feat.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">no_bureau</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>unique</th>\n",
       "      <th>top</th>\n",
       "      <th>freq</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>train_test</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>test</th>\n",
       "      <td>48744</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>42320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>train</th>\n",
       "      <td>307511</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>263491</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           no_bureau                      \n",
       "               count unique    top    freq\n",
       "train_test                                \n",
       "test           48744      2  False   42320\n",
       "train         307511      2  False  263491"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_feat.groupby(\"train_test\")[[\"no_bureau\"]].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAuMAAAE/CAYAAADsac1tAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XucXWV97/HPjwQI4SKERIokOFFzlEsDhIBRrAIRCIKALSiKEJSatmLVVo8G9ZRipYZjucjBS6lELqJIU4VU8EAUKJUTLkEsGIInORjIGCQh4X4P/M4f6xncTPbM7CSzZ00yn/frNa/Z61nPWutZe2XBdz/zrGdHZiJJkiRp4G1WdwMkSZKkocowLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuaYNFxMKIOLDudtQpIt4XEcsi4qmI2KfJ+oyIN9XRtk1RROwUETdHxJMRcXbd7ZGk9WUYl9SriFgaEe/uVnZyRPyiazkz98jMm/rYT0cJpMPb1NS6/RPwiczcJjPvqrsxG7Pu/756MAN4BNguMz/TD8fcIiLOjojO8oHqtxFxbsP6V90HEXF8RDwaEe/qZZ+v+jcfERdHxAvlA8STEfHriPhqRLymxTbuHBEXRcRDZfv7IuKMiNi6p/urHPMr5fXJEfFSOb8nIuK/IuLIJu19qvwsjYiZrb+LktaHYVzSJmEQhPzXAwvbfZCo+N/u6v2+N9fjm+t6+LdyGjAZ2B/YFjgIaPqhKiKmA98AjsjM/1jHw//PzNwWGAN8BJgC3BIRW/fR5lHAfGAr4G1lH4cA2wNvXIfjz8/Mbcp23wSuiIjtu9XZvtQ5FvgfEXHIOuxf0jryP+iSNlhjr2FE7B8RC0rP28MRcU6pdnP5/VjpdXtbRGwWEV+KiAciYkVEXNrYSxgRJ5V1qyLif3Q7zt9HxJyI+F5EPAGcXI49PyIeK72HF0TEFg37y4j4eEQsLj2L/xARbyzbPBERVzbW73aOTdsaEVtGxFPAMOC/IuL/9fJWvSci7o+IRyLia12hupzL9xqO1b1H9aaIODMibgGeAd5Qjt3VS/q7iPhKRAwr9d8YETeU9+2RiLi8MXBFtyEzjb2nPZz7xyJiUXnP7o2ISaV8t9K2x6IaqnRUwzY3RcSfNyy/qre7tOEvy7V4NCK+UT5o7AZ8G3hb+XfyWJP2XAxMBz5X6ry7XIfzImJ5+TkvIrYs9Q+Mqsf78xHxe+C7TU5zP+DHmbk8K0sz89Imx54BnA0clpn/p6f3rC+Z+Vxm3gEcBexIFcx787fAk8CHM3Np2ceyzPxUZt69Hsd/GbgM2BqY0EOdBVQfMPde1/1Lap1hXFJ/+zrw9czcjqrH7spS/s7ye/sylGM+cHL5OQh4A7ANcAFAROxO1XN3ArAz8Bpgl27HOhqYQ9XLdznwEvA3wGjgbcBU4OPdtpkG7EvVI/k54MJyjHHAnsAHezivpm3NzOdLLyLAXpnZWy/l+6h6XyeVtn+0l7rdnUg1NGNb4AHgEmAN8CZgH+BQoCv8BvBV4HXAbuXc/n4djvWKiDiubHsSsB1VeFwVEZsD/w5cD7wW+Gvg8oh48zrs/kiqELwX8H6qgLsI+EtKD25mdu+1JTNPprre/7PU+RnwRaprunfZ3/7Alxo2+yNgFFWP+owmbbkV+NvyYe2PIyKa1Pkr4B+AqSWobrDMfBKYB/xJH1XfDfyohOgNVj64fQR4kerfU7M6U6juiSX9cUxJzRnGJbXiqtL7+VjpqfxmL3VfBN4UEaMz86nMvLWXuicA52Tm/Zn5FNVQgeNLj/CxwL9n5i8y8wXg74DuQxLmZ+ZVmflyZj6bmXdm5q2Zuab0Hv4z0H1M71mZ+URmLgR+DVxfjv848FOqYLuubW3VWZm5OjMfBM6j5+DfzMWZuTAz11CFysOBT2fm05m5AjgXOB4gM5dk5rzyQWElcA5rvw+t+nOq0HtH6TFekpkPUAXfbYBZmflCZt4A/GQdz2lWZj5W3o8b2bAe2BOAL2fminLOZ1B9gOnyMnB6eU+ebbL9V4Gzyn4WAL+LajhKo0OoQvs9G9DOZpZTXdPe7Ag81A/HmlLu4eeonnP4cPn30+iRiHiWaljMN4Gr+uG4knpgGJfUimMyc/uuH9bubW50CvDfgPsi4o5oeECsidfx6l65B4DhwE5l3bKuFZn5DLCq2/bLGhci4r9FxE8i4vdl6Mo/UvWSN3q44fWzTZa3obne2tqqxvY+UPa5Ptu+HtgceKjhA9I/U/VQExGvjYgryvCVJ4Dvsfb70KpxQLOhN68DlnXrqX2Atf960ZvfN7x+hp7f+1Y0uz6N7+/KzHyup40z86XM/EZmHkD1l5Yzgdll2EyXv6T6t/2dHnrO19cuwOo+6qyi+gtRT9aU35t3K9+c6gNyl1vLPbwDMJfmPfKjqa7FZ4EDm+xTUj8yjEvqV5m5ODM/SBUMzwLmRPVwWrMH7ZZTBcsuu1KFioepegHHdq2IiK2oegdfdbhuy98C7gMmlGEyX6AastEfemtrq8Z12355ef00MLJh3R812bbxXJcBzwOjGz4kbZeZe5T1Xy31J5b34cO8+n14poXjNR6r2dCb5cC4ePXDpLsCv1uHc+rJOj+USfPrs7xhueV9lr+yfAN4FNi9YdUKqqFPf0Lvfx1qWURsQzUE5T/7qPoz4H3R88O7D1GF7o5u5eNpMgyl/HXn48CJ0WQqzvLh5GyqHvTePnxL2kCGcUn9KiI+HBFjSo9p18N3LwErqYYKvKGh+g+Av4mI8SWU/CPwwzIUYw7w3oh4e3mo8gz6DtbbAk8AT0XEW6jG+PaX3traqv8eETtExDjgU8APS/mvgHdGxK5RPcB6Wm87ycyHqMZqnx0R20X1cOkb4w/T7G0LPEX1sOwuwH/vtotfAR+KiGERMY3eh7B8B/hsROxbHrB8U0S8HriNKnB/LiI2j2qe+fcCVzQc408jYmR5WPSUPt6bRg8DY6OHh2l78APgSxExJiJGUw1r+l4f27wiIj5dHvTcKiKGlyEq29JtRpXMXA4cDEyLhqkP11V54HRfqiEgj9L8odJG51CN2b+kvP9ExC4RcU5ETMzMl4B/A86MiB3LNfkg1YeJnzbbYWauorq+f9fLcWdRXeMR63J+klpnGJfU36YBC6OaYeTrwPFl5ohnqP70f0sZWjEFmE01o8PNwG+peuH+GqCM6f5rqnD3ENVMEiuoeoR78lngQ6Xuv/CHsNsfemzrOrgauJMqqF4DXASQmfOo2np3Wf+TFvZ1ErAFcC9VmJvDH4YxnEH1kOjj5Tg/6rbtp6iC82NUY6R7HBOcmf9Kdd2+T/W+XgWMKuP4j6Iau/4IVU/xSZl5X9n0XOAFqmB9CdUDl626gWoWj99HxCMtbvMVqrHed1ON6f5lKWvVs1SzpPye6nxOBf4sM+/vXjEzl1EF8mMj4qvrcAyogu2TVMNSLqW63m/PzKd72ygzVwNvp+r9vq3s4+dU17jrAcuPl/3eTXWvfIJq+sXe/npzHtUsPxN7WH8N1b+vj7VwbpLWQ6zHFK2SNOBKb/RjVENQflt3eyRJ6g/2jEsatCLivWWYw9ZUMz/cAyytt1WSJPUfw7ikwexoqofwllN9Mcnx6/ONi1K7RMQJ8Yevj2/8WedvY42Ib/ewr2+3o+2SBgeHqUiSJEk1sWdckiRJqolhXJIkSarJunyN8yZh9OjR2dHRUXczJEmStAm78847H8nMMX3VG3JhvKOjgwULFtTdDEmSJG3CImKtb79txmEqkiRJUk0M45IkSVJNDOOSJElSTYbcmHFJkqSNzYsvvkhnZyfPPfdc3U1RNyNGjGDs2LFsvvnm67W9YVySJGmQ6+zsZNttt6Wjo4OIqLs5KjKTVatW0dnZyfjx49drHw5TkSRJGuSee+45dtxxR4P4IBMR7Ljjjhv0FwvDuCRJ0kbAID44beh1MYxLkiRJNXHMuCRJ0kamY+Y1/bq/pbOO6Nf9ravzzjuPGTNmMHLkyB7rbLPNNjz11FMD2KqB0dae8YhYGhH3RMSvImJBKRsVEfMiYnH5vUMpj4g4PyKWRMTdETGpYT/TS/3FETG9oXzfsv8lZVv/fiNJkrSROe+883jmmWfatv81a9a0bd8baiCGqRyUmXtn5uSyPBP4eWZOAH5elgEOByaUnxnAt6AK78DpwFuB/YHTuwJ8qTOjYbtp7T8dSZKkoefSSy9l4sSJ7LXXXpx44ok88MADTJ06lYkTJzJ16lQefPBBAE4++WTmzJnzynbbbLMNADfddBMHHnggxx57LG95y1s44YQTyEzOP/98li9fzkEHHcRBBx3Uaxs+85nPMGnSJKZOncrKlSsBOPDAA1mwYAEAjzzyCB0dHQBcfPHFHHfccbz3ve/l0EMPBeBrX/sa++23HxMnTuT0009/Zb/HHHMM++67L3vssQcXXnjhWm0HmDNnDieffPJ6vns9q2OYytHAgeX1JcBNwOdL+aWZmcCtEbF9ROxc6s7LzNUAETEPmBYRNwHbZeb8Un4pcAzw0wE7k3XQ7M9Jdf9JSJIkqRULFy7kzDPP5JZbbmH06NGsXr2a6dOnc9JJJzF9+nRmz57NJz/5Sa666qpe93PXXXexcOFCXve613HAAQdwyy238MlPfpJzzjmHG2+8kdGjR/e47dNPP82kSZM4++yz+fKXv8wZZ5zBBRdc0Ovx5s+fz913382oUaO4/vrrWbx4MbfffjuZyVFHHcXNN9/MO9/5TmbPns2oUaN49tln2W+//fizP/szdtxxx/V6r9ZVu3vGE7g+Iu6MiBmlbKfMfAig/H5tKd8FWNawbWcp6628s0m5JEmS+tENN9zAscce+0pYHjVqFPPnz+dDH/oQACeeeCK/+MUv+tzP/vvvz9ixY9lss83Ye++9Wbp0actt2GyzzfjABz4AwIc//OGWjnfIIYcwatQoAK6//nquv/569tlnHyZNmsR9993H4sWLATj//PPZa6+9mDJlCsuWLXulfCC0u2f8gMxcHhGvBeZFxH291G023jvXo3ztHVcfBGYA7Lrrrr23WJIkSa+SmX1O4de1fvjw4bz88suvbPfCCy+8UmfLLbd85fWwYcM2aCx3s+N1n+976623ftU5nHbaafzFX/zFq+rcdNNN/OxnP2P+/PmMHDmSAw888JX9NJ5zu779tK0945m5vPxeAfyYasz3w2X4CeX3ilK9ExjXsPlYYHkf5WOblDdrx4WZOTkzJ48ZM2ZDT0uSJGlImTp1KldeeSWrVq0CYPXq1bz97W/niiuuAODyyy/nHe94BwAdHR3ceeedAFx99dW8+OKLfe5/22235cknn+y1zssvv/zKWPTvf//7TY/XOFa9u8MOO4zZs2e/MiPL7373O1asWMHjjz/ODjvswMiRI7nvvvu49dZbX9lmp512YtGiRbz88sv8+Mc/7vM81kfbesYjYmtgs8x8srw+FPgyMBeYDswqv68um8wFPhERV1A9rPl4Zj4UEdcB/9jw0OahwGmZuToinoyIKcBtwEnA/2rX+UiSJA0WA/3c2R577MEXv/hF3vWudzFs2DD22Wcfzj//fD760Y/yta99jTFjxvDd734XgI997GMcffTR7L///kydOvVVvdM9mTFjBocffjg777wzN954Y9M6W2+9NQsXLmTfffflNa95DT/84Q8B+OxnP8v73/9+LrvsMg4++OAej3HooYeyaNEi3va2twHVw5nf+973mDZtGt/+9reZOHEib37zm5kyZcor28yaNYsjjzyScePGseeee7ZlasWonpfsfxHxBqrecKhC//cz88yI2BG4EtgVeBA4rgTrAC6gmhHlGeAjmdk1HeJHgS+UfZ2Zmd8t5ZOBi4GtqB7c/Ovs44QmT56cXU/cDiQf4JQkSetr0aJF7LbbbnU3Qz1odn0i4s6G2QR71Lae8cy8H9irSfkqYGqT8gRO7WFfs4HZTcoXAHtucGMlSZKkGvgNnJIkSRo03vrWt/L888+/quyyyy7jj//4j2tqUXsZxiVJkjRo3HbbbXU3YUANxDdwSpIkaQO16zk/bZgNvS6GcUmSpEFuxIgRrFq1ykA+yGQmq1atYsSIEeu9D4epSJIkDXJjx46ls7OTlStX1t0UdTNixAjGjh3bd8UeGMYlSZIGuc0335zx48fX3Qy1gcNUJEmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmgyvuwFDWcfMa5qWL511xAC3RJIkSXWwZ1ySJEmqiT3jg1CzHnN7yyVJkjY99oxLkiRJNWl7GI+IYRFxV0T8pCyPj4jbImJxRPwwIrYo5VuW5SVlfUfDPk4r5b+JiMMayqeVsiURMbPd5yJJkiT1p4HoGf8UsKhh+Szg3MycADwKnFLKTwEezcw3AeeWekTE7sDxwB7ANOCbJeAPA74BHA7sDnyw1JUkSZI2Cm0N4xExFjgC+E5ZDuBgYE6pcglwTHl9dFmmrJ9a6h8NXJGZz2fmb4ElwP7lZ0lm3p+ZLwBXlLqSJEnSRqHdPePnAZ8DXi7LOwKPZeaastwJ7FJe7wIsAyjrHy/1Xynvtk1P5ZIkSdJGoW1hPCKOBFZk5p2NxU2qZh/r1rW8WVtmRMSCiFiwcuXKXlotSZIkDZx29owfABwVEUuphpAcTNVTvn1EdE2pOBZYXl53AuMAyvrXAKsby7tt01P5WjLzwsycnJmTx4wZs+FnJkmSJPWDtoXxzDwtM8dmZgfVA5g3ZOYJwI3AsaXadODq8npuWaasvyEzs5QfX2ZbGQ9MAG4H7gAmlNlZtijHmNuu85EkSZL6Wx1f+vN54IqI+ApwF3BRKb8IuCwillD1iB8PkJkLI+JK4F5gDXBqZr4EEBGfAK4DhgGzM3PhgJ6JJEmStAEGJIxn5k3ATeX1/VQzoXSv8xxwXA/bnwmc2aT8WuDafmyqJEmSNGD8Bk5JkiSpJoZxSZIkqSaGcUmSJKkmhnFJkiSpJoZxSZIkqSaGcUmSJKkmdcwzrvXQMfOatcqWzjqihpZIkiSpv9gzLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1WR43Q3Q+uuYec1aZUtnHVFDSyRJkrQ+7BmXJEmSamIYlyRJkmpiGJckSZJqYhiXJEmSamIYlyRJkmpiGJckSZJqYhiXJEmSamIYlyRJkmpiGJckSZJqYhiXJEmSatJSGI+IPdvdEEmSJGmoabVn/NsRcXtEfDwitm9riyRJkqQhoqUwnpnvAE4AxgELIuL7EXFIW1smSZIkbeJaHjOemYuBLwGfB94FnB8R90XEnzarHxEjSm/6f0XEwog4o5SPj4jbImJxRPwwIrYo5VuW5SVlfUfDvk4r5b+JiMMayqeVsiURMXN93gBJkiSpLq2OGZ8YEecCi4CDgfdm5m7l9bk9bPY8cHBm7gXsDUyLiCnAWcC5mTkBeBQ4pdQ/BXg0M99U9nlWOfbuwPHAHsA04JsRMSwihgHfAA4Hdgc+WOpKkiRJG4VWe8YvAH4J7JWZp2bmLwEyczlVb/lasvJUWdy8/CRVgJ9Tyi8Bjimvjy7LlPVTIyJK+RWZ+Xxm/hZYAuxffpZk5v2Z+QJwRakrSZIkbRRaDePvAb6fmc8CRMRmETESIDMv62mj0oP9K2AFMA/4f8BjmbmmVOkEdimvdwGWlX2uAR4Hdmws77ZNT+WSJEnSRqHVMP4zYKuG5ZGlrFeZ+VJm7g2MperJ3q1ZtfI7eli3ruVriYgZEbEgIhasXLmyr2ZLkiRJA6LVMD6iYcgJ5fXIVg+SmY8BNwFTgO0jYnhZNRZYXl53Us3WQln/GmB1Y3m3bXoqb3b8CzNzcmZOHjNmTKvNliRJktqq1TD+dERM6lqIiH2BZ3vbICLGdM1JHhFbAe+megD0RuDYUm06cHV5PbcsU9bfkJlZyo8vs62MByYAtwN3ABPK7CxbUD3kObfF85EkSZJqN7zvKgB8GvjXiOjqed4Z+EAf2+wMXFJmPdkMuDIzfxIR9wJXRMRXgLuAi0r9i4DLImIJVY/48QCZuTAirgTuBdYAp2bmSwAR8QngOmAYMDszF7Z4PpIkSVLtWgrjmXlHRLwFeDPVWO37MvPFPra5G9inSfn9VOPHu5c/BxzXw77OBM5sUn4tcG0r5yBJkiQNNq32jAPsB3SUbfaJCDLz0ra0SuutY+Y1a5UtnXVEDS2RJElSX1oK4xFxGfBG4FfAS6U4AcO4JEmStJ5a7RmfDOxeHqiUJEmS1A9anU3l18AftbMhkiRJ0lDTas/4aODeiLgdeL6rMDOPakurJEmSpCGg1TD+9+1shCRJkjQUtTq14X9ExOuBCZn5s4gYSTW3tyRJkqT11NKY8Yj4GDAH+OdStAtwVbsaJUmSJA0FrT7AeSpwAPAEQGYuBl7brkZJkiRJQ0GrYfz5zHyhayEihlPNMy5JkiRpPbUaxv8jIr4AbBURhwD/Cvx7+5olSZIkbfpaDeMzgZXAPcBfANcCX2pXoyRJkqShoNXZVF4G/qX8SJIkSeoHLYXxiPgtTcaIZ+Yb+r1FkiRJ0hDR6pf+TG54PQI4DhjV/82RJEmSho6Wxoxn5qqGn99l5nnAwW1umyRJkrRJa3WYyqSGxc2oesq3bUuLJEmSpCGi1WEqZze8XgMsBd7f762RJEmShpBWZ1M5qN0NkSRJkoaaVoep/G1v6zPznP5pjiRJkjR0rMtsKvsBc8vye4GbgWXtaJQkSZI0FLQaxkcDkzLzSYCI+HvgXzPzz9vVMEmSJGlT19LUhsCuwAsNyy8AHf3eGkmSJGkIabVn/DLg9oj4MdU3cb4PuLRtrZIkSZKGgFZnUzkzIn4K/Ekp+khm3tW+Zqk/dcy8pmn50llHDHBLJEmS1KjVYSoAI4EnMvPrQGdEjG9TmyRJkqQhoaUwHhGnA58HTitFmwPfa1ejJEmSpKGg1Z7x9wFHAU8DZOZyYNt2NUqSJEkaCloN4y9kZlI9vElEbN2+JkmSJElDQ6th/MqI+Gdg+4j4GPAz4F/a1yxJkiRp09fqbCr/FBGHAE8Abwb+LjPntbVlkiRJ0iauzzAeEcOA6zLz3YABXJIkSeonfQ5TycyXgGci4jUD0B5JkiRpyGj1GzifA+6JiHmUGVUAMvOTbWmVJEmSNAS0GsavKT+SJEmS+kmvYTwids3MBzPzkoFqkCRJkjRU9DVm/KquFxHxb21uiyRJkjSk9BXGo+H1G9rZEEmSJGmo6SuMZw+vJUmSJG2gvh7g3CsinqDqId+qvKYsZ2Zu19bWSZIkSZuwXsN4Zg4bqIZIkiRJQ02fX/ojSZIkqT3aFsYjYlxE3BgRiyJiYUR8qpSPioh5EbG4/N6hlEdEnB8RSyLi7oiY1LCv6aX+4oiY3lC+b0TcU7Y5PyJi7ZZIkiRJg1M7e8bXAJ/JzN2AKcCpEbE7MBP4eWZOAH5elgEOByaUnxnAt6AK78DpwFuB/YHTuwJ8qTOjYbtpbTwfSZIkqV+1LYxn5kOZ+cvy+klgEbALcDTQ9SVClwDHlNdHA5dm5VZg+4jYGTgMmJeZqzPzUWAeMK2s2y4z52dmApc27EuSJEka9AZkzHhEdAD7ALcBO2XmQ1AFduC1pdouwLKGzTpLWW/lnU3KJUmSpI1CX1MbbrCI2Ab4N+DTmflEL8O6m63I9Shv1oYZVMNZ2HXXXftq8pDRMfOatcqWzjqihpZIkiQNTW3tGY+IzamC+OWZ+aNS/HAZYkL5vaKUdwLjGjYfCyzvo3xsk/K1ZOaFmTk5MyePGTNmw05KkiRJ6iftnE0lgIuARZl5TsOquUDXjCjTgasbyk8qs6pMAR4vw1iuAw6NiB3Kg5uHAteVdU9GxJRyrJMa9iVJkiQNeu0cpnIAcCJwT0T8qpR9AZgFXBkRpwAPAseVddcC7wGWAM8AHwHIzNUR8Q/AHaXelzNzdXn9V8DFwFbAT8uPJEmStFFoWxjPzF/QfFw3wNQm9RM4tYd9zQZmNylfAOy5Ac2UJEmSauM3cEqSJEk1MYxLkiRJNTGMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTUZXncDNLh0zLxmrbKls46ooSWSJEmbPnvGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmhjGJUmSpJoYxiVJkqSaGMYlSZKkmgyvuwEa/DpmXrNW2dJZR9TQEkmSpE2LPeOSJElSTQzjkiRJUk0M45IkSVJNDOOSJElSTQzjkiRJUk0M45IkSVJNDOOSJElSTQzjkiRJUk0M45IkSVJNDOOSJElSTQzjkiRJUk3aFsYjYnZErIiIXzeUjYqIeRGxuPzeoZRHRJwfEUsi4u6ImNSwzfRSf3FETG8o3zci7inbnB8R0a5zkSRJktpheBv3fTFwAXBpQ9lM4OeZOSsiZpblzwOHAxPKz1uBbwFvjYhRwOnAZCCBOyNibmY+WurMAG4FrgWmAT9t4/moQcfMa5qWL511xAC3RJIkaePVtp7xzLwZWN1wPINoAAAIZ0lEQVSt+GjgkvL6EuCYhvJLs3IrsH1E7AwcBszLzNUlgM8DppV122Xm/MxMqsB/DJIkSdJGZKDHjO+UmQ8BlN+vLeW7AMsa6nWWst7KO5uUNxURMyJiQUQsWLly5QafhCRJktQfBssDnM3Ge+d6lDeVmRdm5uTMnDxmzJj1bKIkSZLUvwY6jD9chphQfq8o5Z3AuIZ6Y4HlfZSPbVIuSZIkbTQGOozPBbpmRJkOXN1QflKZVWUK8HgZxnIdcGhE7FBmXjkUuK6sezIippRZVE5q2JckSZK0UWjbbCoR8QPgQGB0RHRSzYoyC7gyIk4BHgSOK9WvBd4DLAGeAT4CkJmrI+IfgDtKvS9nZtdDoX9FNWPLVlSzqDiTiiRJkjYqbQvjmfnBHlZNbVI3gVN72M9sYHaT8gXAnhvSRkmSJKlOg+UBTkmSJGnIMYxLkiRJNWnnN3BqCGr2zZx+K6ckSVJz9oxLkiRJNTGMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTUxjEuSJEk18Rs41XZ+K6ckSVJz9oxLkiRJNTGMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTUxjEuSJEk1cWpD1cLpDiVJkuwZlyRJkmpjGJckSZJqYhiXJEmSamIYlyRJkmpiGJckSZJq4mwqGjSazbACzrIiSZI2XfaMS5IkSTUxjEuSJEk1MYxLkiRJNTGMS5IkSTXxAU4Nes0e7PShTkmStCmwZ1ySJEmqiWFckiRJqolhXJIkSaqJY8a1UXIcuSRJ2hTYMy5JkiTVxJ5xbTLsLZckSRsbe8YlSZKkmtgzrk1as95ysMdckiQNDvaMS5IkSTWxZ1xDkuPLJUnSYGAYlwoDuiRJGmgbfRiPiGnA14FhwHcyc1bNTdImxIAuSZLaaaMO4xExDPgGcAjQCdwREXMz8956W6ZNmQ+FSpKk/rJRh3Fgf2BJZt4PEBFXAEcDhnENuJ5CeneGdkmS1GVjD+O7AMsaljuBt9bUFqklrYb2DbUuod/hOJIk1WNjD+PRpCzXqhQxA5hRFp+KiN+0tVXNjQYeqeG4at0mdY3irHq3b5NN6hptorxGg5/XaPDzGg1+rVyj17eyo409jHcC4xqWxwLLu1fKzAuBCweqUc1ExILMnFxnG9Q7r9Hg5zUa/LxGg5/XaPDzGg1+/XmNNvYv/bkDmBAR4yNiC+B4YG7NbZIkSZJaslH3jGfmmoj4BHAd1dSGszNzYc3NkiRJklqyUYdxgMy8Fri27na0oNZhMmqJ12jw8xoNfl6jwc9rNPh5jQa/frtGkbnW846SJEmSBsDGPmZckiRJ2mgZxgdAREyLiN9ExJKImFl3e4a6iBgXETdGxKKIWBgRnyrloyJiXkQsLr93qLutQ11EDIuIuyLiJ2V5fETcVq7RD8uD26pJRGwfEXMi4r5yP73N+2hwiYi/Kf+d+3VE/CAiRngf1SsiZkfEioj4dUNZ0/smKueX/HB3REyqr+VDRw/X6Gvlv3V3R8SPI2L7hnWnlWv0m4g4bF2PZxhvs4gYBnwDOBzYHfhgROxeb6uGvDXAZzJzN2AKcGq5JjOBn2fmBODnZVn1+hSwqGH5LODcco0eBU6ppVXq8nXgf2fmW4C9qK6V99EgERG7AJ8EJmfmnlQTHRyP91HdLgamdSvr6b45HJhQfmYA3xqgNg51F7P2NZoH7JmZE4H/C5wGUPLD8cAeZZtvluzXMsN4++0PLMnM+zPzBeAK4Oia2zSkZeZDmfnL8vpJqgCxC9V1uaRUuwQ4pp4WCiAixgJHAN8pywEcDMwpVbxGNYqI7YB3AhcBZOYLmfkY3keDzXBgq4gYDowEHsL7qFaZeTOwultxT/fN0cClWbkV2D4idh6Ylg5dza5RZl6fmWvK4q1U320D1TW6IjOfz8zfAkuosl/LDOPttwuwrGG5s5RpEIiIDmAf4DZgp8x8CKrADry2vpYJOA/4HPByWd4ReKzhP4beS/V6A7AS+G4ZSvSdiNga76NBIzN/B/wT8CBVCH8cuBPvo8Gop/vGDDE4fRT4aXm9wdfIMN5+0aTMKWwGgYjYBvg34NOZ+UTd7dEfRMSRwIrMvLOxuElV76X6DAcmAd/KzH2Ap3FIyqBSxh0fDYwHXgdsTTXsoTvvo8HL/+4NMhHxRarhrpd3FTWptk7XyDDefp3AuIblscDymtqiIiI2pwril2fmj0rxw11//iu/V9TVPnEAcFRELKUa2nUwVU/59uXP7eC9VLdOoDMzbyvLc6jCuffR4PFu4LeZuTIzXwR+BLwd76PBqKf7xgwxiETEdOBI4IT8w9zgG3yNDOPtdwcwoTy9vgXVIP+5NbdpSCtjjy8CFmXmOQ2r5gLTy+vpwNUD3TZVMvO0zBybmR1U98wNmXkCcCNwbKnmNapRZv4eWBYRby5FU4F78T4aTB4EpkTEyPLfva5r5H00+PR038wFTiqzqkwBHu8azqKBFRHTgM8DR2XmMw2r5gLHR8SWETGe6mHb29dp337pT/tFxHuoevWGAbMz88yamzSkRcQ7gP8E7uEP45G/QDVu/EpgV6r/iR2Xmd0fstEAi4gDgc9m5pER8QaqnvJRwF3AhzPz+TrbN5RFxN5UD9huAdwPfISqk8f7aJCIiDOAD1D9Wf0u4M+pxrN6H9UkIn4AHAiMBh4GTgeuosl9Uz5EXUA1S8czwEcyc0Ed7R5KerhGpwFbAqtKtVsz8y9L/S9SjSNfQzX09afd99nr8QzjkiRJUj0cpiJJkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1cQwLkmSJNXEMC5JkiTVxDAuSZIk1eT/Axi49Ue2VsOjAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 864x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_feat[\"count_bureau\"] = df_feat[\"count_bureau\"].fillna(0)\n",
    "df_feat[[\"count_bureau\"]].plot.hist(bins = int(max(df_feat[\"count_bureau\"])), \n",
    "                      title = \"Histogram of bureau count for SK_ID_CURR\", figsize = (12, 5))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>425815</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>187540</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>306845</td>\n",
       "      <td>test</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>349804</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>306876</td>\n",
       "      <td>test</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>306896</td>\n",
       "      <td>test</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>187520</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>187544</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>349807</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>445679</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau\n",
       "0      425815      train           0.0       True\n",
       "1      187540      train           0.0       True\n",
       "2      306845       test           0.0       True\n",
       "3      349804      train           0.0       True\n",
       "4      306876       test           0.0       True\n",
       "5      306896       test           0.0       True\n",
       "6      187520      train           0.0       True\n",
       "7      187544      train           0.0       True\n",
       "8      349807      train           0.0       True\n",
       "9      445679      train           0.0       True"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Large count of Bureau (if one has bureau number more than 90% of ppl have)\n",
    "sorted_bureau_hist = df_feat.sort_values(\"count_bureau\").reset_index(drop = True)\n",
    "sorted_bureau_hist.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "90% ppl has bureau less than 11.000000\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "      <th>large_bureau_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>356252</th>\n",
       "      <td>318065</td>\n",
       "      <td>test</td>\n",
       "      <td>78.0</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>356253</th>\n",
       "      <td>169704</td>\n",
       "      <td>train</td>\n",
       "      <td>94.0</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>356254</th>\n",
       "      <td>120860</td>\n",
       "      <td>train</td>\n",
       "      <td>116.0</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        SK_ID_CURR train_test  count_bureau  no_bureau  large_bureau_count\n",
       "356252      318065       test          78.0      False                True\n",
       "356253      169704      train          94.0      False                True\n",
       "356254      120860      train         116.0      False                True"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_bureau_90 = sorted_bureau_hist.iloc[int(0.9 * len(sorted_bureau_hist))][\"count_bureau\"]\n",
    "print(\"90% ppl has bureau less than {:f}\".format(count_bureau_90))\n",
    "sorted_bureau_hist[\"large_bureau_count\"] = sorted_bureau_hist.count_bureau > count_bureau_90\n",
    "sorted_bureau_hist.tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "      <th>large_bureau_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau  large_bureau_count\n",
       "0      100002      train           8.0      False               False"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_feat = merge_feature(df_feat, sorted_bureau_hist[[\"SK_ID_CURR\", \"large_bureau_count\"]])\n",
    "df_feat.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DAYS_CREDIT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th colspan=\"4\" halign=\"left\">DAYS_CREDIT</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>max</th>\n",
       "      <th>min</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100001</td>\n",
       "      <td>-735.000000</td>\n",
       "      <td>-49</td>\n",
       "      <td>-1572</td>\n",
       "      <td>489.942514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100002</td>\n",
       "      <td>-874.000000</td>\n",
       "      <td>-103</td>\n",
       "      <td>-1437</td>\n",
       "      <td>431.451040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100003</td>\n",
       "      <td>-1400.750000</td>\n",
       "      <td>-606</td>\n",
       "      <td>-2586</td>\n",
       "      <td>909.826128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100004</td>\n",
       "      <td>-867.000000</td>\n",
       "      <td>-408</td>\n",
       "      <td>-1326</td>\n",
       "      <td>649.124025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100005</td>\n",
       "      <td>-190.666667</td>\n",
       "      <td>-62</td>\n",
       "      <td>-373</td>\n",
       "      <td>162.297053</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  SK_ID_CURR  DAYS_CREDIT                       \n",
       "                     mean  max   min         std\n",
       "0     100001  -735.000000  -49 -1572  489.942514\n",
       "1     100002  -874.000000 -103 -1437  431.451040\n",
       "2     100003 -1400.750000 -606 -2586  909.826128\n",
       "3     100004  -867.000000 -408 -1326  649.124025\n",
       "4     100005  -190.666667  -62  -373  162.297053"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col = \"DAYS_CREDIT\"\n",
    "creditdays_hist = df_bureau.groupby(\"SK_ID_CURR\")[[col]].aggregate([\"mean\", \"max\", \"min\", \"std\"]).reset_index()\n",
    "creditdays_hist.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py:3108: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n",
      "  obj = obj._drop_axis(labels, axis, level=level, errors=errors)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4UAAAE/CAYAAADxHmaaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xt8VNW9///XJ1zTKnIREIE2wQsEDCZchCBQESXQYqEUCliRi1Z+bUBJjYLKEVraWo+WlBjs92jllAolegClR3sAq1BBPSCRyOWEVoQpUCAgEsVCuMT1+2N2ppOQy5Bkcpv38/HIIzNrr733Z0/mD96stdc25xwiIiIiIiISmaJquwARERERERGpPQqFIiIiIiIiEUyhUEREREREJIIpFIqIiIiIiEQwhUIREREREZEIplAoIiIiIiISwRQKRUQiiJntNrNbaruO2mRm3zGzg2b2hZkl1nY99Z2ZzTezZd7rr3mfa6MqHM9nZrdVX4UiIlIRhUIRkQaitH9Mm9kUM9tc9N4518M5t7GC48SYmTOzxmEqtbY9Dcxwzl3mnNte28U0JM65A97nWghgZhvN7N7arktERMqnUCgiIjWqDoTNrwO7a7OAOvAZlKqu1iUiIuGlUCgiEkGCRxPN7CYz22Zmn5tZnpkt9Lq97f3O96YCJplZlJnNNbO/m9kxM/u9mV0RdNy7vW0nzOzfSpxnvpmtNLNlZvY5MMU793tmlm9mR8ws08yaBh3PmdmPzOwjMztlZgvM7Bpvn8/N7OXg/iWusdRazayZmX0BNAI+NLOPy9h/kTe99HMzyzazQV771WZ2xsxaB/VNNLNPzKyJ936ameWa2UkzW2dmXy9xTSlm9hHwUXnn8rZFm9lS71i5ZvawmR0K2n61ma0ys+Nmtt/M7i/n7x5tZr/yPpPPzGyz11Y0KnyPmR0A3vL69zezd72/z4fBU47NLNbM/uL9Xd4ArgzaFhhlNrOfA4OATO97lFlGbZOCvjuPldhW5vfEzBab2a9K9P9vM5vlvZ5tZv/w6vyrmQ0t6/MREYl0CoUiIpFrEbDIOdcCuAZ42Wsf7P1u6U0FfA+Y4v0MAboAlwGZAGbWHXgW+D7QAbgC6FjiXKOAlUBLYDlQCKTiDxRJwFDgRyX2GQ70BvoDDwPPeefoDNwATCzjukqt1Tl31jl3mdfnRufcNWXs/z6QALQG/gD8l5k1d84dBt4DvhvU905gpXPuvJmNBh4FxgBtgU3AihLHHg30A7qXdy5v2zwgxruG24G7ig5iZlHAfwMf4v+shwKzzCy5jGt6Gv9nOcA718PAl0HbvwHEAclm1hF4HfiZ1zcNWGVmbb2+fwCy8f/tFgCTSzuhc+4x7zMomqo7o2Qf77vzG2AScDXQBugU1KW878lSYKL3WWBmV3rbV5hZV2AG0Nc5dzmQDPjK+GxERCKeQqGISMPyqjeqkm9m+fjDWlnOA9ea2ZXOuS+cc/9bTt/vAwudc/ucc18AjwATzD/dcCzw3865zc65c8DjgCux/3vOuVedc186584457Kdc//rnLvgnPMB/4E/mAR70jn3uXNuN7ALWO+d/zPgf4CyFokpr9YKOeeWOedOeLX9CmgGdPU2/wEvjJqZARO8NoDpwBPOuVzn3AXgF0BC8Giht/1T59yZEM71PeAXzrmTzrlDQEbQcfoCbZ1zP3XOnXPO7QOe9+opxgtN04AHnHP/cM4VOufedc6dDeo23zn3T6+uu4A/Oef+5P293gC2Ad80s6955/43L2S/jT+cVtZY4DXn3NtePf9GUFgt73vinNsKfIY/COJd+0bnXB7+MNkM6G5mTZxzPudcqSPDIiKiUCgi0tCMds61LPrh4tG3YPcA1wN7zOx9MxtZTt+rgb8Hvf870Bho7207WLTBOXcaOFFi/4PBb8zsejN7zcyOmn9K6S8ImoboyQt6faaU95dRuvJqrZCZPehN1/zMC9ZXBNW2Ekgys6vxj6g6/KNh4L9XcVFQIP8UMIqPmpb8HMo719Ul+ge//jpwdYn/AHi0jGu8EmgOlBeKSh57XIljD8Q/Cnw1cNI598+g/sGf9aUq+d35J0HfnRC+J0v51wjqXcCL3nH2ArOA+cAxM8vy/mYiIlIKhUIRkQjlnPvIOTcRaAc8Caw0s69y8SgfwGH8YaHI14AL+IPaEYKm/JlZNP5pgMVOV+L9b4A9wHXe9NVH8Qeo6lBereXy7umbjX+UrpUXrD8rqs05lw+s97bfCaxwzhVd20FgenAod85FO+feDTqFC/VclPhc8U+bLXIQ2F/iXJc7575ZymV9AhTgnyJcluC/z0HgxRLH/qpz7pdeTa2870mRr4V43NIcIei6zOwrFP/uVPQ9WQaMMrMb8U9/fTVwYuf+4JwbiP+74PB/x0VEpBQKhSIiEcrM7jKzts65L4F8r7kQOI5/Cl+XoO4rgFRvkZHL8I/YvORNk1wJ3GFmA7xFQH5CxQHvcuBz4Asz6wb8sNourPxaK3I5/gB5HGhsZo8DLUr0+QNwN/57C/8Q1P7/gEfMrAeA+Re3GVeFc73sHa+Vd59f8D15W4HPvcVUos2skZndYGZ9S57E+/suARaaf3GaRuZfPKhZGXUtw//3TPb6NjezW8ysk3Pu7/inkv7EzJqa2UDgjnKuMY/i36OSVgIjzWyg9935KcX/bVLu98SbVvs+/hHCVUXTcs2sq5nd6l1jAf6R5cJy6hARiWgKhSIikWs4sNv8K3IuAiY45wq86Z8/B97xpg/2xx8qXsS/Mul+/P/Qngng3fM3E8jCP/JzCjgGnKVsafhH2k7hvxfupWq8rjJrDcE6/Pcr/g3/tMgCSkz5BP4IXAfkOec+LGp0zr2CfzQqy5vquAsYUYVz/RQ45F3Dn/EHqLPeuQrxh7EEb/snwG/xTz8tTRqwE3+A+tSrs9R/AzjnDuJfGOhR/IH1IPBQUP878S+W8yn+xXB+X841LgLGmn8F1YySG73vTgr+cH0EOOldc3DdFX1PlgLxeFNHPc2AX+L/XI7iHw1/tJw6RUQimv1r1ouIiEjVeaNz+fin/O2v7XoaCjP7If7gXnJBnohmZoPxj27GeKOiIiJyiTRSKCIiVWZmd5jZV7x7zZ7GPyrlq92q6jcz62BmN5v/uYtdgQeBV2q7rrrE/M+HfAD4rQKhiEjlKRSKiEh1GIV/gZfD+KdWTnCailJVTfE/guEU/ofKr6H8R4xEFDOLwz8i3QH4dS2XIyJSr2n6qIiIiIiISATTSKGIiIiIiEgEUygUERERERGJYI1ru4BwuPLKK11MTExtlyEiIiIiIlIrsrOzP3HOtQ2lb4MMhTExMWzbtq22yxAREREREakVZvb3UPtq+qiIiIiIiEgEUygUERERERGJYAqFIiIiIiIiEaxB3lMoIiIiIlIfnD9/nkOHDlFQUFDbpUg91bx5czp16kSTJk0qfQyFQhERERGRWnLo0CEuv/xyYmJiMLPaLkfqGeccJ06c4NChQ8TGxlb6OJo+KiIiIiJSSwoKCmjTpo0CoVSKmdGmTZsqjzQrFIqIiIiI1CIFQqmK6vj+KBSKiIiIiMglMzMefPDBwPunn36a+fPnh/WcMTExfPe73w28X7lyJVOmTAnrOSOBQqGIiIiIiFyyZs2asXr1aj755JMaPe+2bdvYvXt3jZ6zoVMoFBERERGRS9a4cWPuu+8+0tPTL9r297//naFDh9KzZ0+GDh3KgQMHAJgyZQr3338/AwYMoEuXLqxcuTKwz1NPPUXfvn3p2bMn8+bNK/O8aWlp/OIXv7io/dNPP2X06NH07NmT/v37s2PHDgDmz5/PtGnTuOWWW+jSpQsZGRmBfZYtW8ZNN91EQkIC06dPp7CwsNKfR32mUCgiIiIiIpWSkpLC8uXL+eyzz4q1z5gxg7vvvpsdO3bw/e9/n/vvvz+w7ciRI2zevJnXXnuNOXPmALB+/Xo++ugjtm7dSk5ODtnZ2bz99tulnvN73/seH3zwAXv37i3WPm/ePBITE9mxYwe/+MUvuPvuuwPb9uzZw7p169i6dSs/+clPOH/+PLm5ubz00ku888475OTk0KhRI5YvX15dH029olAoIiIiYZXbLa62SxCRMGnRogV33313sdE3gPfee48777wTgEmTJrF58+bAttGjRxMVFUX37t3Jy8sD/KFw/fr1JCYm0qtXL/bs2cNHH31U6jkbNWrEQw89xBNPPFGsffPmzUyaNAmAW2+9lRMnTgTC6re+9S2aNWvGlVdeSbt27cjLy+PNN98kOzubvn37kpCQwJtvvsm+ffuq54OpZ/ScQhERERERqbRZs2bRq1cvpk6dWmaf4BUymzVrFnjtnAv8fuSRR5g+fXpI55w0aRJPPPEEPXr0uOhYpZ03+JyNGjXiwoULOOeYPHnyReEyEmmkUEREREREKq1169Z873vf44UXXgi0DRgwgKysLACWL1/OwIEDyz1GcnIyS5Ys4YsvvgDgH//4B8eOHQNg6NCh/OMf/yjWv0mTJqSmpvLrX/860DZ48ODA9M+NGzdy5ZVX0qJFizLPOXToUFauXBk4z6effsrf//73UC+7QVEoFBERERGRKnnwwQeLrUKakZHBf/7nf9KzZ09efPFFFi1aVO7+w4YN48477yQpKYn4+HjGjh3LqVOn+PLLL9m7dy+tW7e+aJ977rmHCxcuBN7Pnz+fbdu20bNnT+bMmcPSpUvLPWf37t352c9+xrBhw+jZsye33347R44cucQrbxistGHW+q5Pnz5u27ZttV2GiIiI4L+nMG5Pbm2XIVIn5ebmEhen+27LsmvXLpYsWcLChQtru5Q6rbTvkZllO+f6hLK/RgpFRERERKROuuGGGxQIa4BCoYiIiIiISARTKBQREREREYlgCoUiIiIiIiIRTKFQREREREQkgikUioiIiIiIRDCFQhERERERkQjWOFwHNrPmwNtAM+88K51z88zsd8A3gM+8rlOcczlmZsAi4JvAaa/9A+9Yk4G5Xv+fOefKfxKliIiIiEg9FDPn9Wo9nu+X36qwz5kzZxg+fDhvvfUWBw8eJC4ujm7dulFQUMDll19OSkoKkydPLrbPqFGjOHbsGO+99x4A69evZ968ebz77ruYGYWFhfTu3Ztnn32WNm3aMH36dPLz8zl79iyDBg3iueeeK7OerVu3kpaWRl5eHmbGwIEDycjI4OWXX+ahhx6iY8eOFBQUMH36dFJTUwH/g+uff/552rZtGzjOxo0bycnJYdSoUXTp0oXTp0/Tvn17Hn74YUaOHBnY77LLLmP//v288847nDt3jv3799O1a1cA5s6dy9ixYy/tQw+T48ePM2nSJNauXVvtxw5bKATOArc6574wsybAZjP7H2/bQ865lSX6jwCu8376Ab8B+plZa2Ae0AdwQLaZ/dE5dzKMtYuIiIiIRIQlS5YwZswYGjVqBMA111zD9u3bAdi3bx9jxozhyy+/ZOrUqQDk5+fzwQcfBMJUbGwsw4YNY8mSJbzwwgvce++9PPPMM/Tt25cBAwaQnJxMamoqo0aNAmDnzp1l1pKXl8e4cePIysoiKSkJ5xyrVq3i1KlTAIwfP57MzExOnDhB165dGTt2LJ07dwYgNTWVtLS0i445aNAgXnvtNQBycnIYPXo00dHRDB06NNBn8eLFAPh8PkaOHElOTk6VPtNwaNu2LR06dOCdd97h5ptvrtZjh236qPP7wnvbxPtx5ewyCvi9t9//Ai3NrAOQDLzhnPvUC4JvAMPDVbeIiIiISCRZvnx5ILCV1KVLFxYuXEhGRkagbdWqVdxxxx1MmDCBrKysQHt6ejpPPPEEu3fvJjMzkyeffBKAI0eO0KlTp0C/+Pj4MmtZvHgxkydPJikpCQAzY+zYsbRv375YvzZt2nDttddy5MiRS7rWhIQEHn/8cTIzMy9pv9LMnz+fyZMnM2zYMGJiYli9ejUPP/ww8fHxDB8+nPPnzwOQnZ3NN77xDXr37k1ycnKg5ueff56+ffty44038t3vfpfTp08DMGXKFO6//34GDBhAly5dWLnyX2Npo0ePZvny5VWuvaSw3lNoZo3MLAc4hj/YbfE2/dzMdphZupk189o6AgeDdj/ktZXVLiIiIiIiVXDu3Dn27dtHTExMmX169erFnj17Au9XrFjBxIkTmThxIitWrAi0d+jQgVmzZpGUlMTcuXNp3bo14B/Bu/XWWxkxYgTp6enk5+eXea5du3bRu3fvCus+cOAABQUF9OzZM9CWnp5OQkICCQkJDBkyJOTrqYqPP/6Y119/nTVr1nDXXXcxZMgQdu7cSXR0NK+//jrnz59n5syZrFy5kuzsbKZNm8Zjjz0GwJgxY3j//ff58MMPiYuL44UXXggc98iRI2zevJnXXnuNOXPmBNr79OnDpk2bqqX2YGENhc65QudcAtAJuMnMbgAeAboBfYHWwGyvu5V2iHLaizGz+8xsm5ltO378eLXULyIiIiLSkH3yySe0bNmy3D7O/euf3nl5eezdu5eBAwdy/fXX07hxY3bt2hXYnpKSQmFhIVOmTAm0TZ06ldzcXMaNG8fGjRvp378/Z8+erVS9L730Ej169KBLly488MADNG/ePLAtNTWVnJwccnJy2LBhQ0jXU1UjRoygSZMmxMfHU1hYyPDh/gmN8fHx+Hw+/vrXv7Jr1y5uv/12EhIS+NnPfsahQ4cAfwAeNGgQ8fHxLF++nN27dweOO3r0aKKioujevTt5eXmB9nbt2nH48OFqq79Ijaw+6pzLBzYCw51zR7wpomeB/wRu8rodAjoH7dYJOFxOe8lzPOec6+Oc6xN8g6mIiIiIiJQuOjqagoKCcvts376duLg4wB/KTp48SWxsLDExMfh8vmJTSKOiovCvH1nc1VdfzbRp01izZs1FQTJYjx49yM7OLrOW8ePHs3v3bjZt2sSDDz7I0aNHQ7nMMq+nqpo18096jIqKokmTJoFrj4qK4sKFCzjn6NGjRyCs7ty5k/Xr1wP+aaKZmZns3LmTefPmFfs7FB0XiofYgoICoqOjq6X2YGELhWbW1sxaeq+jgduAPd59gnirjY4Gir4RfwTuNr/+wGfOuSPAOmCYmbUys1bAMK9NRERERESqoFWrVhQWFpYZDH0+H2lpacycORPwTx1du3YtPp8Pn89HdnZ2sVBYmrVr1wburzt69CgnTpygY8fS7wabMWMGS5cuZcuWLYG2ZcuWXRT+kpKSmDRpEosWLQr5WgF27NjBggULSElJCXmfzMzMSt+D2LVrV44fPx5YpfX8+fOBEcFTp07RoUMHzp8/H/J9gn/729+44YYbKlVLecK5+mgHYKmZNcIfPl92zr1mZm+ZWVv800JzgP/P6/8n/I+j2Iv/kRRTAZxzn5rZAuB9r99PnXOfhrFuEREREZFaEcojJKrbsGHD2Lx5M7fddhvgv08uMTEx8EiKmTNnMnXqVHw+HwcOHKB///6BfWNjY2nRogVbtmyhX79+pR5//fr1xaZ6PvXUU1x11VWl9m3fvj1ZWVmkpaVx7NgxoqKiGDx4MGPGjLmo7+zZs+nVqxePPvoo4L+ncNmyZYHtr776KgCbNm0iMTGR06dP065dOzIyMoqtPFqRPXv2VHq1z6ZNm7Jy5Uruv/9+PvvsMy5cuMCsWbPo0aMHCxYsoF+/fnz9618nPj4+sMJqeTZs2MC3vlX93xGrzjm1dUWfPn3ctm3barsMERERAXK7xRG3J7e2yxCpk3Jzc6ttKmNlbd++nYULF/Liiy/Wah111ciRI1m9ejVNmzat7VIYPHgwa9asoVWrVsXaS/semVm2c65PKMcN50ihiIiIiIjUcYmJiQwZMoTCwsLAswrlX4qecVjbjh8/zo9//OOLAmF1UCgUEREREYlw06ZNq9HzrVu3jtmzZxdri42N5ZVXXqnROuqTtm3bMnr06LAcW6FQRERERERqVHJyMsnJybVdhnhq5JEUIiIiIiIiUjcpFIqIiIiIiEQwhUIREREREZEIplAoIiIiIiISwbTQjIiIiIhIXTH/imo+3mcVdjlz5gzDhw/nrbfe4uDBg8TFxdGtW7fAw+tTUlKYPHlysX1GjRrFsWPHeO+99wD/A+rnzZvHu+++i5lRWFhI7969efbZZ2nTpg3Tp08nPz+fs2fPMmjQIJ577rky69m6dStpaWnk5eVhZgwcOJCMjAxefvllHnroITp27EhBQQHTp08nNTXVf5nz5/P888/Ttm3bwHE2btxITk4Oo0aNokuXLpw+fZr27dvz8MMPM3LkyMB+l112Gfv37+edd97h3Llz7N+/n65duwIwd+5cxo4de2mfeRVNmDCBBQsWcN1119XYORUKRUREREQi2JIlSxgzZkzgGYXXXHMN27dvB2Dfvn2MGTOGL7/8kqlTpwKQn5/PBx98EAhTsbGxDBs2jCVLlvDCCy9w77338swzz9C3b18GDBhAcnIyqampjBo1CoCdO3eWWUteXh7jxo0jKyuLpKQknHOsWrWKU6dOATB+/HgyMzM5ceIEXbt2ZezYsXTu3BmA1NRU0tLSLjrmoEGDAs8azMnJYfTo0URHRzN06NBAn8WLFwPg8/kYOXIkOTk5VfpMq+KHP/wh//7v/87zzz9fY+fU9FERERERkQi2fPnyQGArqUuXLixcuJCMjIxA26pVq7jjjjuYMGECWVlZgfb09HSeeOIJdu/eTWZmJk8++SQAR44coVOnToF+8fHxZdayePFiJk+eTFJSEgBmxtixY2nfvn2xfm3atOHaa6/lyJEjl3StCQkJPP7442RmZl7SfqWZP38+kydPZtiwYcTExLB69Woefvhh4uPjGT58OOfPnwfgpz/9KX379uWGG27gvvvuwznHhQsX6Nu3Lxs3bgTgkUce4bHHHgP8IfbPf/4zFy5cqHKNoVIoFBERERGJUOfOnWPfvn3ExMSU2adXr17s2bMn8H7FihVMnDiRiRMnsmLFikB7hw4dmDVrFklJScydO5fWrVsD/hG8W2+9lREjRpCenk5+fn6Z59q1axe9e/eusO4DBw5QUFBAz549A23p6ekkJCSQkJDAkCFDQr6eqvj44495/fXXWbNmDXfddRdDhgxh586dREdH8/rrrwMwY8YM3n//fXbt2sWZM2d47bXXaNy4Mb/73e/44Q9/yBtvvMHatWuZN28eAFFRUVx77bV8+OGH1VJjKBQKRUREJDTVfa+TiNS6Tz75hJYtW5bbxzkXeJ2Xl8fevXsZOHAg119/PY0bN2bXrl2B7SkpKRQWFjJlypRA29SpU8nNzWXcuHFs3LiR/v37c/bs2UrV+9JLL9GjRw+6dOnCAw88QPPmzQPbUlNTycnJIScnhw0bNoR0PVU1YsQImjRpQnx8PIWFhQwfPhzwj4b6fD4ANmzYQL9+/YiPj+ett95i9+7dAPTo0YNJkyZxxx13sGTJEpo2bRo4brt27Th8+HC11VkRhUIRERERkQgVHR1NQUFBuX22b99OXFwc4A9lJ0+eJDY2lpiYGHw+X7EppFFRUZjZRce4+uqrmTZtGmvWrLkoSAbr0aMH2dnZZdYyfvx4du/ezaZNm3jwwQc5evRoKJdZ5vVUVbNmzQD/dTdp0iRw7VFRUVy4cIGCggJ+9KMfsXLlSnbu3MkPfvCDYp/3zp07admyJXl5ecWOW1BQQHR0dLXUGAqFQhERERGRCNWqVSsKCwvLDIY+n4+0tDRmzpwJ+KeOrl27Fp/Ph8/nIzs7u1goLM3atWsD99cdPXqUEydO0LFjx1L7zpgxg6VLl7Jly5ZA27Jlyy4Kf0lJSUyaNIlFixaFfK0AO3bsYMGCBaSkpIS8T2ZmZqXvQSz6XK+88kq++OILVq5cGdi2evVqTpw4wdtvv839999fbFrt3/72N3r06FGpc1aGVh8VEREREakrQniERHUbNmwYmzdv5rbbbgP898klJiYGHkkxc+ZMpk6dis/n48CBA/Tv3z+wb2xsLC1atGDLli3069ev1OOvX7++2FTPp556iquuuqrUvu3btycrK4u0tDSOHTtGVFQUgwcPZsyYMRf1nT17Nr169eLRRx8F/PcULlu2LLD91VdfBWDTpk0kJiZy+vRp2rVrR0ZGRrGVRyuyZ88ebr755pD7B2vZsiU/+MEPiI+PJyYmhr59+wL+abtz5szhzTffpHPnzsyYMYMHHniApUuXkpeXR3R0NB06dKjUOSvDqnNObV3Rp08ft23bttouQ0REpGGZf0Wl/sGa2y2OuD25YShIpP7Lzc2ttqmMlbV9+3YWLlzIiy++WKt11FUjR45k9erVxe75C6f09HRatGjBPffcE/I+pX2PzCzbOdcnlP01UigiIiIiEsESExMZMmQIhYWFgWcVyr8UPeOwprRs2ZJJkybV6DkVCkVEREREIty0adNq9Hzr1q1j9uzZxdpiY2N55ZVXarSOumjq1Kk1fk6FQhERERERqVHJyckkJyfXdhni0eqjIiIiIiIiEUyhUEREREREJIIpFIqIiIiIiEQwhUIREREREZEIpoVmRERERETqiPil8dV6vJ2Td1bY58yZMwwfPpy33nqLgwcPEhcXR7du3QIPr09JSWHy5MnF9hk1ahTHjh3jvffeA/wPqJ83bx7vvvsuZkZhYSG9e/fm2WefpU2bNkyfPp38/HzOnj3LoEGDeO6558qsZ+vWraSlpZGXl4eZMXDgQDIyMnj55Zd56KGH6NixIwUFBUyfPp3U1FQA5s+fz/PPP0/btm0Dx9m4cSM5OTmMGjWKLl26cPr0adq3b8/DDz/MyJEjA/tddtll7N+/n3feeYdz586xf/9+unbtCsDcuXMZO3bspX3oJdx77738+Mc/pnv37mX2yczM5Ktf/WqtrDwKCoUiIiIiIhFtyZIljBkzJvCMwmuuuYbt27cDsG/fPsaMGcOXX34ZCCz5+fl88MEHgTAVGxvLsGHDWLJkCS+88AL33nsvzzzzDH379mXAgAEkJyeTmprKqFGjANi5s+ygmpeXx7hx48jKyiIpKQnnHKtWreLUqVMAjB8/nszMTE6cOEHXrl0ZO3YsnTt3BiA1NZW0tLSLjjlo0KDAswYQJcDQAAAgAElEQVRzcnIYPXo00dHRDB06NNBn8eLFAPh8PkaOHElOTk6VPtNgv/3tbyvsM23aNG6++eZaC4WaPioiIiIiEsGWL18eCGwldenShYULF5KRkRFoW7VqFXfccQcTJkwgKysr0J6ens4TTzzB7t27yczM5MknnwTgyJEjdOrUKdAvPr7s0dDFixczefJkkpKSADAzxo4dS/v27Yv1a9OmDddeey1Hjhy5pGtNSEjg8ccfJzMz85L2K838+fOZPHkyw4YNIyYmhtWrV/Pwww8THx/P8OHDOX/+PAC33HIL27ZtA+Cyyy7jscce48Ybb6R///7k5eUB8JWvfIWYmBi2bt1a5boqI2yh0Myam9lWM/vQzHab2U+89lgz22JmH5nZS2bW1Gtv5r3f622PCTrWI177X81MDzQREREREakG586dY9++fcTExJTZp1evXuzZsyfwfsWKFUycOJGJEyeyYsWKQHuHDh2YNWsWSUlJzJ07l9atWwP+Ebxbb72VESNGkJ6eTn5+fpnn2rVrF717966w7gMHDlBQUEDPnj0Dbenp6SQkJJCQkMCQIUNCvp6q+Pjjj3n99ddZs2YNd911F0OGDGHnzp1ER0fz+uuvX9T/n//8J/379+fDDz9k8ODBPP/884Ftffr0YdOmTdVS16UK50jhWeBW59yNQAIw3Mz6A08C6c6564CTwD1e/3uAk865a4F0rx9m1h2YAPQAhgPPmlmjMNYtIiIiIhIRPvnkE1q2bFluH+dc4HVeXh579+5l4MCBXH/99TRu3Jhdu3YFtqekpFBYWMiUKVMCbVOnTiU3N5dx48axceNG+vfvz9mzZytV70svvUSPHj3o0qULDzzwAM2bNw9sS01NJScnh5ycHDZs2BDS9VTViBEjaNKkCfHx8RQWFjJ8+HDAPxrq8/ku6t+0adPA/Yy9e/cu1qddu3YcPny42mq7FGELhc7vC+9tE+/HAbcCK732pcBo7/Uo7z3e9qFmZl57lnPurHNuP7AXuClcdYuIiIiIRIro6GgKCgrK7bN9+3bi4uIAfyg7efIksbGxxMTE4PP5ik0hjYqKwv9P+OKuvvpqpk2bxpo1ay4KksF69OhBdnZ2mbWMHz+e3bt3s2nTJh588EGOHj0aymWWeT1V1axZM8B/3U2aNAlce1RUFBcuXLiof3CfRo0aFetTUFBAdHR0tdR1qcJ6T6GZNTKzHOAY8AbwMZDvnCu6+kNAR+91R+AggLf9M6BNcHsp+4iIiIiISCW1atWKwsLCMoOhz+cjLS2NmTNnAv6po2vXrsXn8+Hz+cjOzi4WCkuzdu3awP11R48e5cSJE3TsWPo/52fMmMHSpUvZsmVLoG3ZsmUXhb+kpCQmTZrEokWLQr5WgB07drBgwQJSUlJC3iczM7Na7kGsyN/+9jduuOGGsJ+nNGFdfdQ5VwgkmFlL4BWgtEheNH578X8p+LeV1V6Mmd0H3Afwta99rVL1ioiISAXmXwHzP6vtKkQarFAeIVHdhg0bxubNm7ntttsA/31yiYmJgUdSzJw5k6lTp+Lz+Thw4AD9+/cP7BsbG0uLFi3YsmUL/fr1K/X469evLzbV86mnnuKqq64qtW/79u3JysoiLS2NY8eOERUVxeDBgxkzZsxFfWfPnk2vXr149NFHAf89hcuWLQtsf/XVVwHYtGkTiYmJnD59mnbt2pGRkVFs5dGK7Nmzh5tvvjnk/pX1zjvvMG/evLCfpzRWnXNqyz2R2TzgNDAbuMo5d8HMkoD5zrlkM1vnvX7PzBoDR4G2wBwA59wT3nEC/co6V58+fVzRCj8iIiJSTeZf4f2+tFCY2y2OuD25YShIpP7Lzc2ttqmMlbV9+3YWLlzIiy++WKt11FUjR45k9erVNG3aNGznqOrfoLTvkZllO+f6hLJ/OFcfbeuNEGJm0cBtQC6wASh6AuRkYI33+o/ee7ztbzl/Yv0jMMFbnTQWuA6onbVaRUREREQamMTERIYMGUJhYWFtl1Invfbaa2ENhOBf8GfBggVhPUd5wjl9tAOw1FspNAp42Tn3mpn9H5BlZj8DtgMveP1fAF40s73Ap/hXHMU5t9vMXgb+D7gApHjTUkVEREREpBpMmzatRs+3bt06Zs+eXawtNjaWV155pUbrqCtuv/32Wj1/2EKhc24HkFhK+z5KWT3UOVcAjCvjWD8Hfl7dNYqIiIiISM1LTk4mOVmPH68rwrr6qIiIiNRRRfcHiohIxFMoFBERkdIpOIqIRASFQhERESmfwqGISIMW1ucUioiISB2jgCciIiVopFBEREQUFkXqiNxucdX6E4ozZ87wjW98g8LCQnw+H9HR0SQmJhIXF8dNN93E0qVLL9pn1KhRJCUlBd6vX7+epKQkip6BXlhYSEJCAu+++y5//etfueWWW0hISCAuLo777ruv3Hq2bt3K4MGD6dq1K926dePee+/l9OnT/O53v6Nt27YkJCTQrVs30tPTA/vMnz+fjh07kpCQEPjJz89n48aNXHHFFSQmJtK1a1cGDx7Ma6+9Vmy/p59+mpSUFBISEujevTvR0dGBY6xcuTKkz7CIz+fjD3/4Q5nbb7nlFoqep37bbbdx8uTJSzp+uGikUEREJFLNv+KSH0QvIg3PkiVLGDNmDI0aNQLgmmuuYfv27QDs27ePMWPG8OWXXzJ16lQA8vPz+eCDD7jsssvYv38/sbGxDBs2jCVLlvDCCy9w77338swzz9C3b18GDBhAcnIyqampjBo1CoCdO3eWWUteXh7jxo0jKysrEDJXrVrFqVOnABg/fjyZmZmcOHGCrl27MnbsWDp37gxAamoqaWlpFx1z0KBBgSCYk5PD6NGjiY6OZujQoYE+ixcvBvyhbuTIkeTk5FTqsywKhXfeeWeFfSdNmsSzzz7LY489VqlzVSeNFIqIiIiIRLDly5cHAltJXbp0YeHChWRkZATaVq1axR133MGECRPIysoKtKenp/PEE0+we/duMjMzefLJJwE4cuQInTp1CvSLj48vs5bFixczefLkwCikmTF27Fjat29frF+bNm249tprOXLkyCVda0JCAo8//jiZmZmXtF9p/vKXvwRGFBMTEzl16hRz5sxh06ZNJCQkkJ6ezpkzZ5gwYQI9e/Zk/PjxnDlzJrD/t7/9bVasWFHlOqqDQqGIiIiISIQ6d+4c+/btIyYmpsw+vXr1Ys+ePYH3K1asYOLEiUycOLFYqOnQoQOzZs0iKSmJuXPn0rp1a8A/gnfrrbcyYsQI0tPTyc/PL/Ncu3btonfv3hXWfeDAAQoKCujZs2egLT09PRDShgwZEvL1VNbTTz/N4sWLycnJYdOmTURHR/PLX/6SQYMGkZOTQ2pqKr/5zW/4yle+wo4dO3jsscfIzs4O7N+qVSvOnj3LiRMnqlxLVSkUioiISMV0z6FIg/TJJ5/QsmXLcvsU3ScI/umde/fuZeDAgVx//fU0btyYXbt2BbanpKRQWFjIlClTAm1Tp04lNzeXcePGsXHjRvr378/Zs2crVe9LL71Ejx496NKlCw888ADNmzcPbEtNTSUnJ4ecnBw2bNgQ0vVUxc0338yPf/xjMjIyyM/Pp3Hji+/Me/vtt7nrrrsA6NmzZ7EQC9CuXTsOHz5cLfVUhUKhiIiIlE1hUKRBi46OpqCgoNw+27dvJy7Ov2jNSy+9xMmTJ4mNjSUmJgafz1dsCmlUVBRmdtExrr76aqZNm8aaNWsuCpLBevToUWw0raTx48eze/duNm3axIMPPsjRo0dDucwyr6cq5syZw29/+1vOnDlD//79yxx9LO3zKFJQUEB0dHSVa6kqhUIRERERkQjVqlUrCgsLywyGPp+PtLQ0Zs6cCfinjq5duxafz4fP5yM7O7tYKCzN2rVrOX/+PABHjx7lxIkTdOzYsdS+M2bMYOnSpWzZsiXQtmzZsovCX1JSEpMmTWLRokUhXyvAjh07WLBgASkpKSHvk5mZWeo9iB9//DHx8fHMnj2bPn36sGfPHi6//PLAojgAgwcPZvny5YB/auyOHTsC25xzHD16tNypuzVFq4+KiIjIxTRCKFIr4vbk1vg5hw0bxubNm7ntttsAf9hJTEykoKCAyy+/nJkzZzJ16lR8Ph8HDhygf//+gX1jY2Np0aIFW7ZsoV+/fqUef/369cWmej711FNcddVVpfZt3749WVlZpKWlcezYMaKiohg8eDBjxoy5qO/s2bPp1asXjz76KOC/p3DZsmWB7a+++ioAmzZtIjExkdOnT9OuXTsyMjKKrTxakT179nDzzTdf1P7rX/+aDRs20KhRI7p3786IESOIioqicePG3HjjjUyZMoUf/vCHTJ06lZ49e5KQkMBNN90U2D87O5v+/fuXOu20pll1zamtS/r06eOKnv8hIiIiQUqGvaJHUhQ9niL4d5nHuLTHWOR2i6uVf+iK1Ae5ubnVMpWxKrZv387ChQt58cUXa7WOumrkyJGsXr2apk2bVutxH3jgAb797W9fUkAtS2nfIzPLds71CWX/2o+lIiIiIiJSaxITExkyZAiFhYWBZxXKvwQ/7L463XDDDdUSCKuD7ikUERGRS1fWSKKmnYrUS9OmTavRQLhu3brA4yOKfr7zne/U2Pnrgh/84Ae1XUKARgpFREQauqKgdonTPis8nohIJSUnJ5OcnFzbZYhHI4UiIiKRIhxhbv4VCokiVdQQ1/iQmlMd3x+FQhERESlOIU+kxjRv3pwTJ04oGEqlOOc4ceJEYGXXytL0URERkUinEChSazp16sShQ4c4fvx4bZci9VTz5s3p1KlTlY6hUCgiIiKVozApUmVNmjQhNja2tsuQCKfpoyIiIiIiIhFMoVBERERERCSCKRSKiIiIiIhEMIVCERERERGRCKZQKCIiItVDC8+IiNRLCoUiIiIiIiIRLGyh0Mw6m9kGM8s1s91m9oDXPt/M/mFmOd7PN4P2ecTM9prZX80sOah9uNe218zmhKtmERGRiFNdo3saJRQRqbfC+ZzCC8CDzrkPzOxyINvM3vC2pTvnng7ubGbdgQlAD+Bq4M9mdr23eTFwO3AIeN/M/uic+78w1i4iIiIiIhIRwhYKnXNHgCPe61Nmlgt0LGeXUUCWc+4ssN/M9gI3edv2Ouf2AZhZltdXoVBERKQiGsETEZEK1Mg9hWYWAyQCW7ymGWa2w8yWmFkrr60jcDBot0NeW1ntJc9xn5ltM7Ntx48fr+YrEBERiQAKkCIiESnsodDMLgNWAbOcc58DvwGuARLwjyT+qqhrKbu7ctqLNzj3nHOuj3OuT9u2bauldhERkXpNIU9EREIQznsKMbMm+APhcufcagDnXF7Q9ueB17y3h4DOQbt3Ag57r8tqFxERERERkSoI5+qjBrwA5DrnFga1dwjq9h1gl/f6j8AEM2tmZrHAdcBW4H3gOjOLNbOm+Bej+WO46hYREWkQNEooIiIhCudI4c3AJGCnmeV4bY8CE80sAf8UUB8wHcA5t9vMXsa/gMwFIMU5VwhgZjOAdUAjYIlzbncY6xYREREREYkY4Vx9dDOl3w/4p3L2+Tnw81La/1TefiIiIiIiIlI5NbL6qIiIiEQQTV0VEalXFApFREREREQimEKhiIiIiIhIBFMoFBERaWg0fVNERC6BQqGIiIiIiEgEUygUERERERGJYAqFIiIiIiIiESykUGhmN4S7EBEREakC3UcoIiKVFOpI4f8zs61m9iMzaxnWikRERCQ0CoIiIlINQgqFzrmBwPeBzsA2M/uDmd0e1spERESk/lJgFRGpN0K+p9A59xEwF5gNfAPIMLM9ZjYmXMWJiIiIiIhIeIV6T2FPM0sHcoFbgTucc3He6/Qw1iciIiIiIiJhFOpIYSbwAXCjcy7FOfcBgHPuMP7RQxEREakNmqYpIiJV1DjEft8EzjjnCgHMLApo7pw77Zx7MWzViYiIiIiISFiFOlL4ZyA66P1XvDYRERGpKzRqKCIilRBqKGzunPui6I33+ivhKUlERERERERqSqih8J9m1qvojZn1Bs6EpyQRERERERGpKaHeUzgL+C8zO+y97wCMD09JIiIickk0bVRERKogpFDonHvfzLoBXQED9jjnzoe1MhEREREREQm7kB9eD/QFegKJwEQzuzs8JYmIiEiZNCooIiLVLKSRQjN7EbgGyAEKvWYH/D5MdYmIiIiIiEgNCPWewj5Ad+ecC2cxIiIiIiIiUrNCnT66C7gqnIWIiIiIiIhIzQt1pPBK4P/MbCtwtqjROfftsFQlIiIiIiIiNSLUUDg/nEWIiIjIJdKCMyIiUk1Cmj7qnPsL4AOaeK/fBz4obx8z62xmG8ws18x2m9kDXntrM3vDzD7yfrfy2s3MMsxsr5ntMLNeQcea7PX/yMwmV/JaRUREGj6FRRERuUQhhUIz+wGwEvgPr6kj8GoFu10AHnTOxQH9gRQz6w7MAd50zl0HvOm9BxgBXOf93Af8xjt3a2Ae0A+4CZhXFCRFRERERESkakJdaCYFuBn4HMA59xHQrrwdnHNHnHMfeK9PAbn4w+QoYKnXbSkw2ns9Cvi98/tfoKWZdQCSgTecc586504CbwDDQ6xbREREREREyhFqKDzrnDtX9MbMGuN/TmFIzCwG/0PvtwDtnXNHwB8c+Ve47AgcDNrtkNdWVruIiIiIiIhUUaih8C9m9igQbWa3A/8F/HcoO5rZZcAqYJZz7vPyupbS5sppL3me+8xsm5ltO378eCiliYiI1B+6V1BERMIk1FA4BzgO7ASmA38C5la0k5k1wR8IlzvnVnvNed60ULzfx7z2Q0DnoN07AYfLaS/GOfecc66Pc65P27ZtQ7wsERERERGRyBbq6qNfOueed86Nc86N9V6XO33UzAx4Ach1zi0M2vRHoGgF0cnAmqD2u71VSPsDn3nTS9cBw8yslbfAzDCvTURERERERKoopOcUmtl+Spmy6ZzrUs5uNwOTgJ1mluO1PQr8EnjZzO4BDgDjvG1/Ar4J7AVOA1O9c3xqZgvwPwYD4KfOuU9DqVtERKRB0lRSERGpRqE+vL5P0Ovm+INc6/J2cM5tpvT7AQGGltLf4V/ltLRjLQGWhFSpiIhIQ6MQKCIiYRTq9NETQT//cM79Grg1zLWJiIiIiIhImIU6fbRX0Nso/COHl4elIhEREREREakxoU4f/VXQ6wuAD/hetVcjIiISieZfAfM/q+0qREQkQoUUCp1zQ8JdiIiISERTMBQRkVoS6vTRH5e3vcQjJ0RERERERKSeuJTVR/vif5YgwB3A28DBcBQlIiIipdAqpCIiEgahhsIrgV7OuVMAZjYf+C/n3L3hKkxERERERETCL6RHUgBfA84FvT8HxFR7NSIiIiIiIlKjQg2FLwJbzWy+mc0DtgC/D19ZIiIiESp4iqimi4qISA0IdfXRn5vZ/wCDvKapzrnt4StLREREREREakKoI4UAXwE+d84tAg6ZWWyYahIREREREZEaElIo9KaMzgYe8ZqaAMvCVZSIiIh4NIVURETCLNSRwu8A3wb+CeCcOwxcHq6iREREREREpGaEGgrPOecc4ADM7KvhK0lERCRCaVRQRERqQaih8GUz+w+gpZn9APgz8Hz4yhIREYlwCogiIlJDQl199Gkzux34HOgKPO6ceyOslYmIiEQqBUIREalBFYZCM2sErHPO3QYoCIqIiIiIiDQgFU4fdc4VAqfNTP9tKSIiIiIi0sCENH0UKAB2mtkbeCuQAjjn7g9LVSIiIiIiIlIjQg2Fr3s/IiIiUp10/6CIiNSyckOhmX3NOXfAObe0pgoSERERERGRmlPRPYWvFr0ws1VhrkVERERERERqWEWh0IJedwlnISIiIiIiIlLzKgqFrozXIiIiIuXT/ZIiIvVCRaHwRjP73MxOAT2915+b2Skz+7wmChQREWmwFJqknsntFlfbJYhIGJQbCp1zjZxzLZxzlzvnGnuvi963qKkiRURERKTuUDgUaVgqfHh9ZZnZEjM7Zma7gtrmm9k/zCzH+/lm0LZHzGyvmf3VzJKD2od7bXvNbE646hUREREREYlEYQuFwO+A4aW0pzvnEryfPwGYWXdgAtDD2+dZM2tkZo2AxcAIoDsw0esrIiIiIrVMI4YiDUPYQqFz7m3g0xC7jwKynHNnnXP7gb3ATd7PXufcPufcOSDL6ysiIiIiNaBk8CsvCCokitRP4RwpLMsMM9vhTS9t5bV1BA4G9TnktZXVLiIiUn9pgRlpABQARRqOmg6FvwGuARKAI8CvvHYrpa8rp/0iZnafmW0zs23Hjx+vjlpFREREpAIKhyL1X42GQudcnnOu0Dn3JfA8/umh4B8B7BzUtRNwuJz20o79nHOuj3OuT9u2bau/eBEREZEIcinTRivTX0TqjhoNhWbWIejtd4CilUn/CEwws2ZmFgtcB2wF3geuM7NYM2uKfzGaP9ZkzSIiIiISOoVBkfonnI+kWAG8B3Q1s0Nmdg/w72a208x2AEOAVADn3G7gZeD/gLVAijeieAGYAawDcoGXvb71UvzS+NouQURERKRU1bmATFF/BUSR+qFxuA7snJtYSvML5fT/OfDzUtr/BPypGkurM+KXxrNz8s7aLkNERESkVNUR6nK7xRG3J7caqhGRcKmN1UelDBpJFBERkZpUWuirTBAMZR+NGorUXQqFtSA4/CkIiohEGD2OQmpJKFM6Ne1TJDIpFNYShUERERGpKxQCRSKbQmENCzUMKjSKiDQwGiGUOqomA6HCp0jdpFBYBygAiog0YMFhUMFQIpTCoEjdplBYBykkioiISJHKBqr6EMTqQ40ikUChUERERKQOCg5M1RWewnFMEan/FArriNJGB+OXxmvUUEREJEJcSkgra5XQ3G5xdf7xEKHWKCI1J2wPr5fKUQgUERGRqqoPI4J1tS6RSKSRwnpAQVFEpJ7SwjIRr6JnAlYlGDWkZwo2hGsQqc8UCkVERMJN4VBEROowTR8VERERqQG53eKI25N7yfuU976y7XVJfahRpKHTSKGIiIhIDaooBCkkiUhNUygUERERCbNQg14kB8JIvnaR2qZQWE9osRkREZG6L9TFXyoTgBSaRCRcFApFREREqqCqYa20VUgjOQDqOYYiNU+hsB4pGi3UQ+1FROowrTQqJdSHZwaKSGRTKBQREQmHisKhwmO9VtHzByvqI6XTZyZSOxQKRUREqpsCX0QInuaoMCMi9ZlCYT2jaaMiIiJ1k4KhiNRXCoX1nEKiSMMUM+f1enlskYZOi6CISEOkUCgiUsMUykREQqMALlIzFAojkEYXReqmssLipYTI4L6l7VeyTQG1Guk+wgZPAUVEGiqFwgZKwU+k/isvsCnM1SHBYVDBsF4pK+Qp/NU9+puIhJdCYT1WVvBTIBRpOCoKf6GGw1BGDUUiVWkriGplURGJJAqFDYjCoEjti5nzetjDVtHxS/4urU9VzyEV0MhgvVMy4FUU+BQMa58+e5HwC1soNLMlZnbMzHYFtbU2szfM7CPvdyuv3cwsw8z2mtkOM+sVtM9kr/9HZjY5XPXWd0WBsGQwrCgoKkiK1JySgfFSwlx5AfBS+pR1ntLqqqg+BUdpiBRA6g79LURqTjhHCn8HDC/RNgd40zl3HfCm9x5gBHCd93Mf8Bvwh0hgHtAPuAmYVxQkJXShBj8FRJHwqexiMTVx3vKCXmWmr0ZMWNS9hPVCqPcNKoCISCQLWyh0zr0NfFqieRSw1Hu9FBgd1P575/e/QEsz6wAkA2845z51zp0E3uDioClluJSQp0AoUjsuZbppZcNWdYS0yo5wVrWmiAmYUiMU/ERESlfT9xS2d84dAfB+t/PaOwIHg/od8trKar+Imd1nZtvMbNvx48ervfD6Ln5pfOCntG0iUnXV8UiJcKpqqKyOqaOafio1SauLioiEpq4sNGOltLly2i9udO4551wf51yftm3bVmtxkUyBUaR0oY7w1VTwqe7RwKruV9piOA3iERuaJlpvKQiKiJStpkNhnjctFO/3Ma/9ENA5qF8n4HA57VINquORFsF9FSBFKq/ehKJLcCkL39RpJYPg/CsUDusZBcKG4/9v7/5DLSnrOI5/vrhpURuuaWbq5gZLXuFSbYttCFKZq2W4BQlW5MWCpVKq/7olFGTBWtAfViRGSytYtvQDL+yablFEkKaVddW75m3d9KKkZZkhFVvf/jjP0dmzM3Nm5szveb9guHOe+XGfc8/D2fns88wzfJZANeoOhUuSxjOILki6NVJ+RZiFdJukp8Pw0tslbTezDWGCme2hDDUg5GGIsgwBzTIEso5HU7RN0fsE41638u9HEOwEHiHRX3y2QHXWVXViM/uOpDdLOtnM1jSaRXSXpL1m9iFJj0i6LOy+X9I7JK1KelbSlZLk7k+Z2bWS7g77fc7dJyevQYmS7jlcXljOtC/QF2ct7tPhXZdk3hcjs0w4k2V4adbPBAAAZFdZKHT39yZsuiBmX5d0VcJ5dkvaXWLVUEBSMIxuB/omTzBEPQiHyIKeJADIpy0TzWAGRQJZ0RAXN4MpgRBdlGeoY9bJUtAMPhNgeFbOniP8AyUiFKJUBEQgG4LMbGr5+3EPYecQEgCgGEIhMis6KynQdnGPT0B75J3AptTPj2DYGQRCACiOUIhC0kIfgRBtRuBDJoTBVpsMgARC0AaA2VQ20QwAtA2BsF/SJgKa/KwP77rkqLLUiWoIhJ1CGBi2lbPnNHdwpelqAJ1HTyEqRa8h2iDLswfRTWV8hs+dgzAIABgoQiEaRWhE0wiG/cDnCAwXD7UHZkcoRC3iHmUBVI0eQiRJnJSG3kIAwAARClE7wiHqkHcmSoIi0D30DAFAOQiFqMw4/GUJgQRFlCku4BH6MIk20W0EQsShXQDFMPsoYlUd0giBAKpQeMgww0YBAANGKATQGXGPINapSmUAAA0YSURBVBhf7I/L6f1BmsMvfJ8k6ax/fbvhmiAveoAAoDoMH0Xr0IuIIs5a3EcgRCHjoAigH/gPBCA/QiFahUAIoA7jIEgg7AYu8gGgWoRCNI4giKLoGURdaGsz4H5NAGg9QiFaYzIcEhaRhAt0ZHH4he+jJxAYKB5oD+TDRDOoVd6gN79nXssLyxXVBl1CEERRZQRDwiUAoM/oKUQr0UuIaY8WICRiVgQ9oN/oJQSyIxSi9QiIw0Xwg5Q/vDFstB9Wzp7joh4AakIoRGcQDoeJR01gbFrQIwj2E8EQs6INAdMRCtEJBML+iQa9ydBHCMQ09Ab2FxfwqALtCkhHKETnjAMiQbF/6BVEFpNhkHAIAMBsCIXolKQgSEAEhoEA2G/05gBAMwiF6KQs4ZCg2C30EGISAXBYooGQcAgA9SIUojcIge2V9hgJwiCySAqI43ICJIBpmNEWSNZIKDSzw2a2bGb3mtk9oewkMztgZg+FnxtCuZnZ9Wa2ama/N7MtTdQZ3UJAbK/ofYNlBsL1c4tTt62fW4zdL+3YJnShjlUj5A0HF+qoG20OOFaTPYVvcffXufvW8HpR0k/cfbOkn4TXkvR2SZvDslPS12uvKVorS/ib3zNPSKxZHT2C0ZCUtB73umu6Xv+6ESa7hQtzAGiHNg0f3SFpT1jfI+ldkfKbfOROSSea2WlNVBDdkBYACYfVK+tvHO3Vm+zpyxKU0sLh5HnjfmeVsvRqDgFDP4eFAAgA7dVUKHRJd5jZr81sZyg71d0fl6Tw8+Wh/HRJj0aOXQtlQC4EwnrlGSKa1tNX5NgioTHP9iLBrcj58vyeMs7RpGgwTFpHP4yH7hES0bRxG6QtAs2FwvPcfYtGQ0OvMrPzU/a1mDI/ZieznWZ2j5nd8+STT5ZVT/TQeDgpzzssJs8Mr0mBME+PX5OKDkvN897KCH5l7V+HtJCX9ED6+U0bq6wSgAEjEAIjjYRCd38s/HxC0g8lnSvpz+NhoeHnE2H3NUlnRg4/Q9JjMee80d23uvvWU045pcrqo0OyBj6CYXFxf7siE6XEDReNrrdh8pXJsJc1KKYNVy1yrskl6fdlqXcVf8Np56T3DwCAdqk9FJrZi81s/Xhd0nZJ90lakrQQdluQdGtYX5J0RZiFdJukp8fDTAFptkBHGCwmy0Qy00Je0usy5RlGWlav4OR6Ge+3SG9iGUNoy6zPWFyvX1IZPYQAANSjiZ7CUyX9wsx+J+lXkva5+48k7ZJ0oZk9JOnC8FqS9ks6JGlV0jckfbT+KmMoJkMioTFZ9G+Tdv9gG4cwVqGM91l1SEsK6rPWY5ahwGX0GhIeAQCYzbq6f6G7H5L02pjyv0q6IKbcJV1VQ9WA5/Q1DI7f1/LC8jHlywvLz/1MOjZpWxX3z6E+6+cW9czKrpk/n+jx47A3r42J558Mc/ObNmr54Udizz25b9J+aDfu30JbrZw9p7mDK8esA0PRpkdSAK3U94BYZF/CXf9knY016+NB0oZ/zm/amNpDmKXnL22fotsADBuz4mLICIVAMG020uiMpW1WdAjs5KyicX+PLrx/lGfaxDp5zxUV10uYVh4nGjyn3YM4ub2pcEgoBQC0EaEQKKDr4Sgt6HX9vZWtql5RelurNQ6B03os415Phs0sxxapX9JrgiPQPHoNMTSEQiCDtKDU5hAV7e2Lq2eb645mDSm0FgmN047N87sJhED7ERDRd4RCoMPShrpWeX6005CC3CziQljW+xCnHTtZFtfrmCWEEg4BAHUiFAIF1R2Y0oZ5cv8fUI4ywljVgY7ACNSHYaQYCkIh0EJJ4Y6ghyZ0qQeyyQlkyp71dJb3QnAEykc4RJ8RCoGcsgSzqsMbQRFov7J6HafdyzgtkAIoD8EQfUUoBGYwbSKXuH2znCvtnIRAYHjiQl+e5zkSGoHyEAzRR4RCoGRZ7u+rYoIYwiJQnbaEqmmPssgzYU6eZ0ICSEdQRNcRCoGKlfE4CwIf0D9MSAN0FxPQoG/WNV0BoE/yPAg+rkcRAMqW9qiLpNfLDz9SfcWAniAcog/oKQRqRAAEULUyH6vBEFMgHYEQfUEoBGpGMATQBUn3IBIMgXgERHQZoRAAACQiBAL5cL8huohQCAAAcuP5iMCxCIPoKkIhAADIJW3yGgAj9BiiSwiFAABgJoREINk4HE4GRAIj2oRQCAAAZsZwUiC7cSCc/Ak0hVAIAAAA1CQpABIM0SRCIQAAKBW9hkC8uOAXLWOIKZpCKAQAAABaIikYEhBRpXVNVwAAAADA8wiAqBs9hQAAAECLMTENqkYoBAAAleC+QqB80WCYdj8ikAehEAAAAOioaDhMC4r0MiJNZ0KhmV1sZg+a2aqZLTZdHwAAAKCNkoJi2jBUQuOwdWKiGTM7TtLXJF0oaU3S3Wa25O4PNFszAAAAoHvyBMO5gytHlc0dXDnmXJNl6JZOhEJJ50padfdDkmRmt0jaIYlQCABAi81v2qi9OtJ0NQDMIMvzE8fBMG7bODBOC5uT+8YFzfH+hNBydSUUni7p0cjrNUlvbKguAAAAACYkDT3NU5518pwyh7mmBdGh9IJ2JRRaTJkftYPZTkk7w8t/mtmDlddq5GRJf6npd2GYaGOoGm0MlTqHNoZ60M5QjFn8+rFlXWtjr8q6Y1dC4ZqkMyOvz5D0WHQHd79R0o11VkqSzOwed99a9+/FcNDGUDXaGKpGG0MdaGeoWp/bWFdmH71b0mYz22Rmx0u6XNJSw3UCAAAAgM7rRE+hux8xs6sl3S7pOEm73f3+hqsFAAAAAJ3XiVAoSe6+X9L+pusRo/Yhqxgc2hiqRhtD1WhjqAPtDFXrbRszd5++FwAAAACgl7pyTyEAAAAAoAKEwhRmdq2Z/d7M7jWzO8zslaHczOx6M1sN27dEjlkws4fCshApf4OZLYdjrjeLm+8WQ2NmXzKzg6Ed/dDMToxs+1RoLw+a2UWR8otD2aqZLUbKN5nZXaHtfTdMyoSBM7PLzOx+M/ufmW2d2EYbQ+WS2hMwjZntNrMnzOy+SNlJZnYgfA8dMLMNoTz3tRlgZmea2U/NbCX8W/nxUD68dubuLAmLpJdG1j8m6Yaw/g5Jt2n0/MRtku4K5SdJOhR+bgjrG8K2X0l6UzjmNklvb/r9sTS/SNouaV1Yv07SdWH9HEm/k3SCpE2S/qjRJEvHhfVXSzo+7HNOOGavpMvD+g2SPtL0+2NpfpE0J+k1kn4maWuknDbGUvmS1p5YWKYtks6XtEXSfZGyL0paDOuLkX83c1+bsbBIOk3SlrC+XtIfwr+Pg2tn9BSmcPd/RF6+WNL4Bswdkm7ykTslnWhmp0m6SNIBd3/K3f8m6YCki8O2l7r7L33Ucm6S9K763gnayt3vcPcj4eWdGj2DUxq1sVvc/d/u/rCkVUnnhmXV3Q+5+38k3SJpR+h5fquk74Xj94g2BknuvuLuD8Zsoo2hDrHtqeE6oSPc/eeSnpoo3qHR94909PdQrmuz6muPLnD3x939N2H9GUkrkk7XANsZoXAKM/uCmT0q6f2SPhOKT5f0aGS3tVCWVr4WUw5EfVCj/32S8rexl0n6eyRg0sYwDW0MdUhqT0BRp7r749Logl7Sy0N53u804Chmdpak10u6SwNsZ515JEVVzOzHkl4Rs+kad7/V3a+RdI2ZfUrS1ZI+q1GX8SQvUI4BmNbGwj7XSDoi6ebxYTH7u+L/I4c2NnBZ2ljcYTFltDGUjXaDunANhsLM7CWSvi/pE+7+j5SpP3rbzgYfCt39bRl3/bakfRqFwjVJZ0a2nSHpsVD+5onyn4XyM2L2xwBMa2PhZuR3SrogDC+WktuYEsr/otEQhnWhJ4c2NiA5vseiaGOoQ1o7A4r4s5md5u6Ph2F7T4TyvNdmgCTJzF6gUSC82d1/EIoH184YPprCzDZHXl4q6WBYX5J0RZiBaJukp0PX8u2StpvZhjBL0XZJt4dtz5jZtnBfzhWSkv73HgNiZhdL+qSkS9392cimJUmXm9kJZrZJ0maNJiu6W9LmMAvk8ZIul7QUwuRPJb0nHL8g2hjS0cZQh9j21HCd0G1LGn3/SEd/D+W6Nqu70mincF3+TUkr7v7lyKbBtbPB9xROscvMXiPpf5L+JOnDoXy/RrMPrUp6VtKVkuTuT5nZtRr9IyhJn3P38Q3SH5H0LUkv0ui+sfG9Yxi2r2o0++OBMFThTnf/sLvfb2Z7JT2g0bDSq9z9v5JkZldr9EVznKTd7n5/ONcnJd1iZp+X9FuNvuQwcGb2bklfkXSKpH1mdq+7X0QbQx3c/UhKewJSmdl3NOp9OdnM1jQarbVL0l4z+5CkRyRdFnYvcm0GnCfpA5KWzezeUPZpDbCd2fOj1QAAAAAAQ8PwUQAAAAAYMEIhAAAAAAwYoRAAAAAABoxQCAAAAAADRigEAAAAgAEjFAIAAADAgBEKAQAAAGDACIUAAAAAMGD/B/Pl+Tfp9s0uAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "creditdays_hist.drop(\"SK_ID_CURR\", axis = 1).plot.hist(\n",
    "    bins = 1000, title = \"Histogram of average credit days\", figsize = (15, 5) )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\reshape\\merge.py:543: UserWarning: merging between different levels can give an unintended result (1 levels on the left, 2 on the right)\n",
      "  warnings.warn(msg, UserWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py:3108: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n",
      "  obj = obj._drop_axis(labels, axis, level=level, errors=errors)\n"
     ]
    }
   ],
   "source": [
    "df_feat = merge_feature(df_feat,creditdays_hist, rename = {\"SK_ID_BUREAU\" : \"count_bureau\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "      <th>large_bureau_count</th>\n",
       "      <th>(DAYS_CREDIT, mean)</th>\n",
       "      <th>(DAYS_CREDIT, max)</th>\n",
       "      <th>(DAYS_CREDIT, min)</th>\n",
       "      <th>(DAYS_CREDIT, std)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-874.00</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-1437.0</td>\n",
       "      <td>431.451040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100003</td>\n",
       "      <td>train</td>\n",
       "      <td>4.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-1400.75</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-2586.0</td>\n",
       "      <td>909.826128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100004</td>\n",
       "      <td>train</td>\n",
       "      <td>2.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-867.00</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-1326.0</td>\n",
       "      <td>649.124025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100006</td>\n",
       "      <td>train</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100007</td>\n",
       "      <td>train</td>\n",
       "      <td>1.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-1149.00</td>\n",
       "      <td>-1149.0</td>\n",
       "      <td>-1149.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau  large_bureau_count  \\\n",
       "0      100002      train           8.0      False               False   \n",
       "1      100003      train           4.0      False               False   \n",
       "2      100004      train           2.0      False               False   \n",
       "3      100006      train           0.0       True               False   \n",
       "4      100007      train           1.0      False               False   \n",
       "\n",
       "   (DAYS_CREDIT, mean)  (DAYS_CREDIT, max)  (DAYS_CREDIT, min)  \\\n",
       "0              -874.00              -103.0             -1437.0   \n",
       "1             -1400.75              -606.0             -2586.0   \n",
       "2              -867.00              -408.0             -1326.0   \n",
       "3                  NaN                 NaN                 NaN   \n",
       "4             -1149.00             -1149.0             -1149.0   \n",
       "\n",
       "   (DAYS_CREDIT, std)  \n",
       "0          431.451040  \n",
       "1          909.826128  \n",
       "2          649.124025  \n",
       "3                 NaN  \n",
       "4                 NaN  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_feat.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>DAYS_CREDIT</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>218508</th>\n",
       "      <td>405791</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1021119</th>\n",
       "      <td>453003</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         SK_ID_CURR  DAYS_CREDIT\n",
       "218508       405791            0\n",
       "1021119      453003            0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let the one with longest credict days take charge\n",
    "longest_creadit = df_bureau.sort_values(\"DAYS_CREDIT\", ascending = False).groupby(\"SK_ID_CURR\").head(1)[[\"SK_ID_CURR\", col]]\n",
    "longest_creadit.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "      <th>large_bureau_count</th>\n",
       "      <th>(DAYS_CREDIT, mean)</th>\n",
       "      <th>(DAYS_CREDIT, max)</th>\n",
       "      <th>(DAYS_CREDIT, min)</th>\n",
       "      <th>(DAYS_CREDIT, std)</th>\n",
       "      <th>DAYS_CREDIT</th>\n",
       "      <th>DAYS_CREDIT_mainb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-874.00</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-1437.0</td>\n",
       "      <td>431.451040</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-103.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100003</td>\n",
       "      <td>train</td>\n",
       "      <td>4.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-1400.75</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-2586.0</td>\n",
       "      <td>909.826128</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-606.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100004</td>\n",
       "      <td>train</td>\n",
       "      <td>2.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-867.00</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-1326.0</td>\n",
       "      <td>649.124025</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-408.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau  large_bureau_count  \\\n",
       "0      100002      train           8.0      False               False   \n",
       "1      100003      train           4.0      False               False   \n",
       "2      100004      train           2.0      False               False   \n",
       "\n",
       "   (DAYS_CREDIT, mean)  (DAYS_CREDIT, max)  (DAYS_CREDIT, min)  \\\n",
       "0              -874.00              -103.0             -1437.0   \n",
       "1             -1400.75              -606.0             -2586.0   \n",
       "2              -867.00              -408.0             -1326.0   \n",
       "\n",
       "   (DAYS_CREDIT, std)  DAYS_CREDIT  DAYS_CREDIT_mainb  \n",
       "0          431.451040       -103.0             -103.0  \n",
       "1          909.826128       -606.0             -606.0  \n",
       "2          649.124025       -408.0             -408.0  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_feat = merge_feature(df_feat, longest_creadit, suffixes = (\"\", \"_mainb\"))\n",
    "df_feat.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CREDIT_DAY_OVERDUE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "With 305811 customers, 301947 have 0-day overdue.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA34AAAE/CAYAAAAZshH0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XuYHFWd//H3NxcJV4FwEUlguBkBRS4RdVGXoCiibsJtJboQAwuygsKPVQw8KLiwEnZRwAd0DcstuCZEXBSJoghBwRUkQFa5KUGDxEASEy7BhADh+/ujamY7w1x6ku70pOb9ep55puvU6apT1Wd65jPnVHVkJpIkSZKk6hrU6gZIkiRJkprL4CdJkiRJFWfwkyRJkqSKM/hJkiRJUsUZ/CRJkiSp4gx+kiRJklRxBj9J6oci4qGIOLDV7WiliDgsIp6MiBciYp9Wt2ddiIhrIuL88vF7IuJ3a7m9jIhdG9O6/i0i7oiIf2x1OySpvzL4SdI6FhHzIuL9nco+GRF3tS9n5p6ZeUcv22kr/7Af0qSmttpFwCmZuUlmPtDqxqxrmXlnZo5qX+6q30iSVC+DnySpS/0gUO4IPNTiNqyxfnD+KsnzKklrxuAnSf1Q7ehOROwfEbMj4vmIWBgRXyur/aL8/mw5HfJdETEoIs6OiCciYlFETI2I19ds99hy3ZKI+GKn/ZwbETdExLcj4nngk+W+fxURz0bEUxFxWUS8rmZ7GRGfjojHImJZRJwXEbuUz3k+ImbU1u90jF22NSI2iIgXgMHA/0bE4908v+59R8QWEXFzRCyOiGfKxyPKdVtGxPyI+Gi5vElEzI2IY7vZ75YRcXVELCi39f2y/MByO1+IiKeBq8vyj0TEnPIc/k9E7FWzrX0i4v6y/dcDw2rWHRgR88vH1wE7AD8sX+szumnb58vXaUFEHNdp3Ycj4oHy3DwZEefWrJsZEZ/pVP83ETEuCheXr9FzZflbutn/GyPipohYWp7DE2rKV0TElp2O/S8RMbRcPi4iHinP6U8iYsdOr/XJEfEY8FhZdnBEPFq26TIgauqfGxHfrllebXS87GdXlufqzxFxfkQM7uqYJKkqDH6S1P9dClyamZsBuwAzyvL3lt83L6dD/gr4ZPk1BtgZ2AS4DCAi9gC+AXwC2A54PbB9p32NBW4ANgf+C1gF/D9gK+BdwPuAT3d6ziHAfsA7gTOAKeU+RgJvAcZ3c1xdtjUzV2bmJmWdt2XmLt2fmrr3PYgiiO1IEaBWtJ+XzFwKHAdcERHbABcDczJzajf7vA7YCNgTaK/f7g3AluV+ToyIfYGrgE8Bw4FvATeV4fZ1wPfL7W0JfBc4oqsdZuYxwJ+Aj5av9b91rhMRhwCfAw4GdgM6Twv9K3AsxWv7YeCfImJcue5a4B9qtvU2ir7xI+ADFH3tTeVzPwYs6ebcTAPmA28EjgS+EhHvy8wFwK86Hd/HgRsy8+WyHWcBhwNbA3eW26o1DngHsEdEbAV8Dzibom8+DhzQTZu6ci3wCrArsE95jF4fKKnSDH6S1BrfL0eAno2IZykCWXdeBnaNiK0y84XMvLuHup8AvpaZf8jMF4AzgaPLkY4jgR9m5l2Z+RLwJSA7Pf9Xmfn9zHw1M1dk5n2ZeXdmvpKZ8yiCy992es6Fmfl8Zj4EPAj8tNz/c8CPKf6w7mtb61XXvjNzSWZ+LzOXZ+Yy4F9rjyMzf0oRvG6jCEWf6mpnEbEd8CHgpMx8JjNfzsyf11R5FTinDK8rgBOAb2XmPZm5KjOvBVZSBNV3AkOBS8rt3ADc24dj7+zvgasz88HM/Ctwbu3KzLwjM39bvra/oQhW7efgB8BuEbFbuXwMcH3ZT14GNgXeDERmPpKZT3VxbkYC7wa+kJkvZuYc4D/LbQF8hzKIR0QAR5dlUJzvC8ptvwJ8Bdi7dtSvXL+0PK+HAg9n5g2Z+TJwCfB0PScpIraleA1Py8y/ZuYiivB+dD3Pl6T1lcFPklpjXGZu3v7Fa0fRah1PMdryaETcGxEf6aHuG4EnapafAIYA25brnmxfkZnLee3IzZO1CxHxpnJa5NNRTP/8CsUIS62FNY9XdLG8CV3rqa31qmvfEbFRRHwrimmlz1NMk9280/S+KRSjhFdnZncjWiOBpZn5TDfrF2fmizXLOwL/3Cnkj6Q49jcCf87M2vBdez76arXXt/O2IuIdETEriumuzwEnUb6WmbmSYiT5HyJiEEVAu65cdzvF6OjlwMKImBIRm3Wz/6VlsK5tQ/uo8g3AuyLijRQjiEkxsgfFebq05hwtpZi6WTsiXXtsnftydlrfkx0pAvdTNfv7FsXorSRVlsFPkvq5zHwsM8dT/GF6IXBDRGzMa0frABZQ/GHbbgeKKW0LgaeAEe0rImJDiumHq+2u0/I3gUeB3cqppmdRcy3VWuqprY32z8Ao4B3lcbRPkw2AMgB+C5hKMQWyu49AeBLYMiI272Z95/P3JPCvtSE/MzfKzGkUr8f25ehXux16OIauXu9aT1GEyu629R3gJmBkZr4e+A9Wfy2vpRiFfR+wvJw6XOw48+uZuR/F9NY3AZ/vYv8LKM7Npp3a8OdyG88CP6UYmfw4MK0m9D4JfKrTedowM/+nm+Nf7VjLc1h77H+lmI7b7g01j5+kGHXdqmZfm2Xmnl0ckyRVhsFPkvq5iPiHiNg6M18Fni2LVwGLKaYW7lxTfRrw/yJip4jYhGKE7vpy+twNwEcj4m/K68u+TO8hblPgeeCFiHgz8E8NO7Ce29pom1KMAD5b3mDknE7rzyq/H0fxMRJTu7rZRznF8cfAN6K4YczQiHhv53o1rgBOKkfbIiI2juImK5tSXPP2CvDZiBgSEYcD+/ewrYWs/lp3NoPihjx7RMRGXRzjphQjci9GxP4U4av22H5F0Z++SjnaBxARby/bP5QiUL1I0f/o9Pwngf8BLoiIYVHcxOZ4imtF232H4jrDI/i/aZ5QhNAzI2LPcp+vj4ijejjWmcCeEXF4OTX4s6we7uYA742IHaK4udGZNe18iiKAfjUiNoviJkO7RETnKcySVCkGP0nq/w4BHoriTpeXAkeX11Atp7hW7ZfllLV3UtxI5DqKqYx/pPgj/TMA5XVwnwGmU4yYLAMWUYx+dOdzFAFhGUWIub6Bx9VtW5vgEmBD4C/A3cAt7SsiYj/gdODYzFxFMaqawKRutnUMxXVvj1Kcv9O622lmzqa4zu8y4BlgLsUNbSivnzu8XH6G4qYp/93DMVwAnF2+1p/rYl8/Lo/z9nI/t3eq8mngXyJiGcX1nTN4ranAW4Fv15RtRvHaP0MxdXMJRTjuynigjWL070aK6x1vrVl/E8WNZxZm5v/WtP1GivM+vZyK+yDFdXhdysy/AEcBk8v27Ab8smb9rRR99TfAfcDNnTZxLPA64OHyuG6guOGRJFVWrH5pgSRpoChH2Z6lmMb5x1a3R60XxUdYnJiZ7251WyRJjeWInyQNIBHx0fJGJxtTjNr8FpjX2lapPyinh36a4iY3kqSKMfhJ0sAylmIa3gKK6XFHp1M/BryI+CDFNaMLWf3aO0lSRTjVU5IkSZIqzhE/SZIkSao4g58kSZIkVdyQVjdgbWy11VbZ1tbW6mZIkiRJUkvcd999f8nMrXurt14Hv7a2NmbPnt3qZkiSJElSS0TEE/XUc6qnJEmSJFWcwU+SJEmSKs7gJ0mSJEkVt15f4ydJkiT1Jy+//DLz58/nxRdfbHVTVDHDhg1jxIgRDB06dI2eb/CTJEmSGmT+/PlsuummtLW1ERGtbo4qIjNZsmQJ8+fPZ6eddlqjbTjVU5IkSWqQF198keHDhxv61FARwfDhw9dqJNngJ0mSJDWQoU/NsLb9yuAnSZIkSRXnNX6SJElSk7RNmtnQ7c2b/OFe6zz99NOcdtpp3HvvvWywwQa0tbVxySWX8La3vY1Ro0bx0ksvMXr0aK688kqGDh3KHXfcwdixY1e7duyiiy7i/e9/P4MHD+atb30rL7/8MkOGDGHChAmcdtppDBo0iDvuuIOLLrqII444gksvvRSAhx9+mFGjRjF48GAOOeQQJk+e/Jr2XXPNNXz+859nxIgRvPDCC+y8886cc845/M3f/E1HnVdeeYU3vOENnHDCCVxwwQUAnHXWWaxatYoLL7wQgCeeeIIxY8Zw//33s/nmm79mPy+99BJnnHEGP/zhDxk0aBB77LEHl19+OSNGjODAAw/kzDPP5IMf/GBH/UsuuYTf//73nHHGGey+++6MGjWqY93pp5/OscceS1tbG5tuuikRwRZbbMHUqVPZcccdAXo8V9dccw2zZ8/msssu69jmgQceyEUXXcTo0aM7tjt48GAA3vve9/L1r3+919e6Lwx+kiRJUkVkJocddhgTJkxg+vTpAMyZM4eFCxeyyy67MGfOHFatWsXBBx/MjBkz+MQnPgHAe97zHm6++ebXbG/DDTdkzpw5ACxatIiPf/zjPPfcc3z5y1/uqDNx4kQmTpwIQFtbG7NmzWKrrbbqsZ0f+9jHOkLQrFmzOPzww5k1axa77747AD/96U8ZNWoUM2bM4Ctf+QoRwRe/+EX22WcfPvnJT7L77rtz6qmnct5553UZ+qAIisuWLeP3v/89gwcP5uqrr+bwww/nnnvuYfz48UyfPn214Dd9+nT+/d//HaDjXHWl/fjOOecczj//fK644oq6z1VP6jlva8OpnpIkSVJFzJo1i6FDh3LSSSd1lO29996MHDmyY3nw4MHsv//+/PnPf+7TtrfZZhumTJnCZZddRmY2rM1jxozhxBNPZMqUKR1l06ZN49RTT2WHHXbg7rvvBopg9bWvfY1Pf/rT/PjHP2bZsmUdwbWz5cuXc/XVV3PxxRd3jKJNnDiRDTbYgNtvv50jjzySm2++mZUrVwIwb948FixYwLvf/e662/2ud72r23PYrHO1Ngx+61ijh/slSZKkdg8++CD77bdfj3VefPFF7rnnHg455JCOsjvvvJO999674+vxxx/v8rk777wzr776KosWLWpou/fdd18effRRAFasWMFtt93GRz7yEcaPH8+0adM66h166KFsueWWHHvssXzjG9/odntz585lhx12YLPNNlutfPTo0Tz00EMMHz6c/fffn1tuuQUoRvs+9rGPddxA5fHHH1/tfNx5552v2cctt9zCuHHjum1DX8/VmDFjOvZ38cUX1/WcvnCqpyRJkjQAtIeZxx57jCOPPJK99tqrY113Uz270owRrNpt3nzzzYwZM4aNNtqII444gvPOO2+1kbuTTz6ZFStWrHYNXlfb6+oumLXl7dM9x44dy/Tp07nqqqs66vU01XPMmDEsXLiQbbbZhvPPP7+u4+rujpy15U71lCRJklSXPffck/vuu6/Lde1hZu7cudx9993cdNNNfd7+H/7wBwYPHsw222yztk1dzQMPPNBxfd+0adP42c9+RltbG/vttx9Llixh1qxZHXUHDRrEoEE9x5hdd92VJ554gmXLlq1Wfv/997PHHnsAMG7cOG677Tbuv/9+VqxYwb777ltXW2fNmsUTTzzBnnvuyZe+9KVu69Weq+HDh/PMM8+stn7p0qVNDXqdGfwkSZKkijjooINYuXJlxw1HAO69916eeOKJjuXtttuOyZMnd9wts16LFy/mpJNO4pRTTmnoZxX+/Oc/Z8qUKZxwwgk8//zz3HXXXfzpT39i3rx5zJs3j8svv3y16Z712HjjjZkwYQKnn346q1atAmDq1KksX76cgw46CIBNNtmEAw88kOOOO47x48f3afsbbrghl1xyCVOnTmXp0qWvWd/5XL397W/nl7/8JU8//TQAs2fPZuXKlatde9lsTvWUJEmSmqSej19opIjgxhtv5LTTTmPy5MkMGzas4+Mcao0bN45zzz2349q19mv82p199tkceeSRrFixgr333rvjIwqOOeYYTj/99LVu5/XXX89dd93F8uXL2Wmnnfje977H7rvvzjXXXMNBBx3EBhts0FF37NixnHHGGaxcuXK18t5ccMEFfO5zn+NNb3oTgwYN4s1vfjM33njjaqF1/PjxHH744R13QG3XPi223XHHHcdnP/vZ1epst912jB8/nssvv5wvfvGLPZ6rbbfdlksvvZRDDz2UV199lU022YRp06atNnI5ZsyYjumse+21F1OnTq37WOsR/eUuM2ti9OjROXv27FY3o0/aJs1c528AkiRJWjceeeSRjimLUqN11b8i4r7MHN3bc53qKUmSJEkV51RPSZIkSQ139dVXc+mll65WdsABB3D55Zc3dD+HHXYYf/zjH1cru/DCC1f7cHYZ/CRJkiQ1wcSJE5k4cWLT93PjjTc2fR9V4FRPSZIkqYHW53toqP9a235l8JMkSZIaZNiwYSxZssTwp4bKTJYsWcKwYcPWeBtO9ZQkSZIaZMSIEcyfP5/Fixe3uimqmGHDhjFixIg1fr7BT5IkSWqQoUOHstNOO7W6GdJrONVTkiRJkirO4CdJkiRJFWfwkyRJkqSKM/hJkiRJUsUZ/CRJkiSp4gx+kiRJklRxBj9JkiRJqjiDnyRJkiRVnMFPkiRJkiqu6cEvIgZHxAMRcXO5vFNE3BMRj0XE9RHxurJ8g3J5brm+rdltkyRJkqSBYF2M+J0KPFKzfCFwcWbuBjwDHF+WHw88k5m7AheX9SRJkiRJa6mpwS8iRgAfBv6zXA7gIOCGssq1wLjy8dhymXL9+8r6kiRJkqS10OwRv0uAM4BXy+XhwLOZ+Uq5PB/Yvny8PfAkQLn+ubK+JEmSJGktNC34RcRHgEWZeV9tcRdVs451tds9MSJmR8TsxYsXN6ClkiRJklRtzRzxOwD4u4iYB0ynmOJ5CbB5RAwp64wAFpSP5wMjAcr1rweWdt5oZk7JzNGZOXrrrbduYvMlSZIkqRqaFvwy88zMHJGZbcDRwO2Z+QlgFnBkWW0C8IPy8U3lMuX62zPzNSN+kiRJkqS+acXn+H0BOD0i5lJcw3dlWX4lMLwsPx2Y1IK2SZIkSVLlDOm9ytrLzDuAO8rHfwD276LOi8BR66I9kiRJkjSQtGLET5IkSZK0Dhn8JEmSJKniDH6SJEmSVHEGP0mSJEmqOIOfJEmSJFWcwU+SJEmSKs7gJ0mSJEkVZ/BrkrZJM2mbNLPVzZAkSZIkg58kSZIkVZ3BT5IkSZIqzuAnSZIkSRVn8JMkSZKkijP4SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuAnSZIkSRVn8JMkSZKkijP4SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuAnSZIkSRVn8JMkSZKkijP4SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuDXZG2TZra6CZIkSZIGOIOfJEmSJFWcwU+SJEmSKs7gJ0mSJEkVZ/CTJEmSpIoz+EmSJElSxRn8JEmSJKniDH6SJEmSVHEGP0mSJEmqOIOfJEmSJFWcwU+SJEmSKs7gJ0mSJEkVZ/CTJEmSpIoz+EmSJElSxRn8JEmSJKnimhb8ImJYRPw6Iv43Ih6KiC+X5TtFxD0R8VhEXB8RryvLNyiX55br25rVNkmSJEkaSJo54rcSOCgz3wbsDRwSEe8ELgQuzszdgGeA48v6xwPPZOauwMVlPUmSJEnSWmpa8MvCC+Xi0PIrgYOAG8rya4Fx5eOx5TLl+vdFRDSrfZIkSZI0UDT1Gr+IGBwRc4BFwK3A48CzmflKWWU+sH35eHvgSYBy/XPA8Ga2T5IkSZIGgqYGv8xclZl7AyOA/YHdu6pWfu9qdC87F0TEiRExOyJmL168uHGNlSRJkqSKWid39czMZ4E7gHcCm0fEkHLVCGBB+Xg+MBKgXP96YGkX25qSmaMzc/TWW2/d7KZLkiRJ0nqvmXf13DoiNi8fbwi8H3gEmAUcWVabAPygfHxTuUy5/vbMfM2InyRJkiSpb4b0XmWNbQdcGxGDKQLmjMy8OSIeBqZHxPnAA8CVZf0rgesiYi7FSN/RTWybJEmSJA0YTQt+mfkbYJ8uyv9Acb1f5/IXgaOa1Z5Waps0E4B5kz/c4pZIkiRJGojWyTV+kiRJkqTWMfhJkiRJUsUZ/CRJkiSp4gx+kiRJklRxBj9JkiRJqjiDnyRJkiRVnMFPkiRJkirO4CdJkiRJFWfwkyRJkqSKqyv4RcRbmt0QSZIkSVJz1Dvi9x8R8euI+HREbN7UFkmSJEmSGqqu4JeZ7wY+AYwEZkfEdyLi4Ka2TJIkSZLUEHVf45eZjwFnA18A/hb4ekQ8GhGHN6txkiRJkqS1V+81fntFxMXAI8BBwEczc/fy8cVNbJ8kSZIkaS0NqbPeZcAVwFmZuaK9MDMXRMTZTWmZJEmSJKkh6g1+hwIrMnMVQEQMAoZl5vLMvK5prZMkSZIkrbV6r/H7GbBhzfJGZZkkSZIkqZ+rN/gNy8wX2hfKxxs1p0mSJEmSpEaqN/j9NSL2bV+IiP2AFT3UlyRJkiT1E/Ve43ca8N2IWFAubwd8rDlNkiRJkiQ1Ul3BLzPvjYg3A6OAAB7NzJeb2jJJkiRJUkPUO+IH8HagrXzOPhFBZk5tSqskSZIkSQ1TV/CLiOuAXYA5wKqyOAGDnyRJkiT1c/WO+I0G9sjMbGZjJEmSJEmNV+9dPR8E3tDMhkiSJEmSmqPeEb+tgIcj4tfAyvbCzPy7prRKkiRJktQw9Qa/c5vZCEmSJElS89T7cQ4/j4gdgd0y82cRsREwuLlNkyRJkiQ1Ql3X+EXECcANwLfKou2B7zerUZIkSZKkxqn35i4nAwcAzwNk5mPANs1qlCRJkiSpceoNfisz86X2hYgYQvE5fpIkSZKkfq7e4PfziDgL2DAiDga+C/ywec2SJEmSJDVKvcFvErAY+C3wKeBHwNnNapQkSZIkqXHqvavnq8AV5ZckSZIkaT1SV/CLiD/SxTV9mblzw1skSZIkSWqoej/AfXTN42HAUcCWjW+OJEmSJKnR6rrGLzOX1Hz9OTMvAQ5qctskSZIkSQ1Q71TPfWsWB1GMAG7alBZJkiRJkhqq3qmeX615/AowD/j7hrdGkiRJktRw9d7Vc0yzGyJJkiRJao56p3qe3tP6zPxaY5ojSZIkSWq0vtzV8+3ATeXyR4FfAE82o1GSJEmSpMapN/htBeybmcsAIuJc4LuZ+Y/NapgkSZIkqTHq+jgHYAfgpZrll4C2hrdGkiRJktRw9Y74XQf8OiJuBBI4DJjatFZJkiRJkhqm3rt6/mtE/Bh4T1k0MTMfaF6zJEmSJEmNUu9UT4CNgOcz81JgfkTs1KQ2SZIkSZIaqK7gFxHnAF8AziyLhgLfblajJEmSJEmNU++I32HA3wF/BcjMBcCmzWqUJEmSJKlx6g1+L2VmUtzYhYjYuLcnRMTIiJgVEY9ExEMRcWpZvmVE3BoRj5XftyjLIyK+HhFzI+I3EbHvmh6UJEmSJOn/1Bv8ZkTEt4DNI+IE4GfAFb085xXgnzNzd+CdwMkRsQcwCbgtM3cDbiuXAT4E7FZ+nQh8s09HIkmSJEnqUr139bwoIg4GngdGAV/KzFt7ec5TwFPl42UR8QiwPTAWOLCsdi1wB8X1g2OBqeXI4t0RsXlEbFduR5IkSZK0hnoNfhExGPhJZr4f6DHs9bCNNmAf4B5g2/Ywl5lPRcQ2ZbXtgSdrnja/LDP4SZIkSdJa6HWqZ2auApZHxOvXZAcRsQnwPeC0zHy+p6pd7b6L7Z0YEbMjYvbixYvXpEmSJEmSNKDUNdUTeBH4bUTcSnlnT4DM/GxPT4qIoRSh778y87/L4oXtUzgjYjtgUVk+HxhZ8/QRwILO28zMKcAUgNGjR78mGEqSJEmSVldv8JtZftUtIgK4EngkM79Ws+omYAIwufz+g5ryUyJiOvAO4Dmv75MkSZKktddj8IuIHTLzT5l57Rps+wDgGIqRwjll2VkUgW9GRBwP/Ak4qlz3I+BQYC6wHJi4Bvvs19om9Sk7S5IkSVJD9Dbi931gX4CI+F5mHlHvhjPzLrq+bg/gfV3UT+DkercvSZIkSapPbzd3qQ1uOzezIZIkSZKk5ugt+GU3jyVJkiRJ64nepnq+LSKepxj527B8TLmcmblZU1snSZIkSVprPQa/zBy8rhoiSZIkSWqOXj/AXZIkSZK0fjP4SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQa/JmibNLPVTZAkSZKkDgY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuAnSZIkSRVn8JMkSZKkijP4SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQY/SZIkSao4g58kSZIkVZzBr59pmzSz1U2QJEmSVDEGP0mSJEmqOIOfJEmSJFWcwU+SJEmSKs7gJ0mSJEkVZ/CTJEmSpIoz+EmSJElSxRn8JEmSJKniDH6SJEmSVHEGP0mSJEmqOIOfJEmSJFWcwU+SJEmSKs7gJ0mSJEkVZ/CTJEmSpIoz+EmSJElSxRn8WqBt0kzaJs1sdTMkSZIkDRAGP0mSJEmqOINfCznqJ0mSJGldMPhJkiRJUsUZ/CRJkiSp4gx+kiRJklRxBj9JkiRJqjiDnyRJkiRVnMFPkiRJkirO4CdJkiRJFWfwkyRJkqSKM/hJkiRJUsUZ/CRJkiSp4gx+kiRJklRxBj9JkiRJqjiDnyRJkiRVXNOCX0RcFRGLIuLBmrItI+LWiHis/L5FWR4R8fWImBsRv4mIfZvVLkmSJEkaaJo54ncNcEinsknAbZm5G3BbuQzwIWC38utE4JtNbJckSZIkDShNC36Z+QtgaafiscC15eNrgXE15VOzcDeweURs16y2SZIkSdJAsq6v8ds2M58CKL9vU5ZvDzxZU29+WfYaEXFiRMyOiNmLFy9uamMlSZIkqQr6y81doouy7KpiZk7JzNGZOXrrrbducrMkSZIkaf23roPfwvYpnOX3RWX5fGBkTb0RwIJ13DZJkiRJqqR1HfxuAiaUjycAP6gpP7a8u+c7gefap4RKkiRJktbOkGYp2YMoAAAJZklEQVRtOCKmAQcCW0XEfOAcYDIwIyKOB/4EHFVW/xFwKDAXWA5MbFa7JEmSJGmgaVrwy8zx3ax6Xxd1Ezi5WW2RJEmSpIGsv9zcRZIkSZLUJAY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuAnSZIkSRVn8JMkSZKkijP4SZIkSVLFGfwkSZIkqeIMfi3WNmlmq5sgSZIkqeIMfpIkSZJUcQY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuAnSZIkSRVn8JMkSZKkijP4SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQa/fqRt0sxWN0GSJElSBQ1pdQNk4JMkSZLUXI74SZIkSVLFGfwkSZIkqeIMfpIkSZJUcQY/SZIkSao4g58kSZIkVZzBT5IkSZIqzuAnSZIkSRVn8OvH/Hw/SZIkSY3gB7j3QwY+SZIkSY3kiJ8kSZIkVZzBT5IkSZIqzuBXMW2TZvY6VdSppJIkSdLAYvDr5zoHuXqCnSRJkiTVMvhJkiRJUsUZ/NYTjvJJkiRJWlMGP0mSJEmqOIOfJEmSJFWcwU+SJEmSKs7gtx7pfJ2f1/1JkiRJqofBT5IkSZIqzuAnSZIkSRVn8FvP+YHukiRJknpj8JMkSZKkijP4rae6GuVz5E+SJElSVwx+kiRJklRxBr+KcvRPkiRJUrshrW6AGqOnoNc2aSbzJn+41+d0VWd91n58VTsuSZIkqa8MfgOEH/6uenX3jwJJkiStvwx+FdbXcFdbv94//A0J1eDoqCRJUrX1q2v8IuKQiPhdRMyNiEmtbo+6v3tooz4/cE3CqaOVa2Ztzl1Xr3k92xrIr9VAPvaueD5UD/uJ1mdV7L99OaYqHn/V9JvgFxGDgcuBDwF7AOMjYo/Wtkr16O4Hvbeg0dfpp614Q6niFNmejqn9cU+vXe26esPfun5t15fXaV38A2V9+OfKmvSP/vZPoEa2pZ7z0axjX5Ntr239Vh5vq7TqmFrxftyb3n6n1Pv7aG32XzVrc0zN+H3c0+u3Jvut4mu2rvSb4AfsD8zNzD9k5kvAdGBsi9s0YHV+I653lK+7kaF6f0jX9E28tz8kmvkmUe/xNvKP875o1X7r2Vcj//hpxIhmV+V9fe7a9OGufu7WZDv1rm9WWFmbn79m98n+8A+kRtXt7bmt+sfVug7qnX9HrYtQtS6D/vpgbcJbM9vU1ePe6tZTv6d66/r3al/+Ebum2+5r3Ub8HFbh56I/isxsdRsAiIgjgUMy8x/L5WOAd2TmKd09Z/To0Tl79ux11cS62Vm7Nm/yh/t8btqvOWvEOa29fq1t0sy12nZfnltbt/Yc9HQ+ulvX0zV4nbfbyHNXrzV5jRuxn672293x1/sadN5Ob9uqd3v16KqNzXw9u9p2s46tL23oan27en52evs5qud4u/uZ6+lcdPWa9fT+05d+2FMba4+7uzbW2496a1M962v19nqti/er3t4neuovXb2G3W23Lz8rfT3P9b43dHX++/I+WW8buqu7rn6/1tOmNbWmv+e7W9fbtrq7+3o9/a63876mfX9N6jZDX/tVX+9X0Nu2u3sv708i4r7MHN1rvX4U/I4CPtgp+O2fmZ/pVO9E4MRycRTwu3Xa0PpsBfyl1Y1Qv2X/UE/sH+qNfUQ9sX+oJ/aPatoxM7furVJ/uqvnfGBkzfIIYEHnSpk5BZiyrhq1JiJidj2pWwOT/UM9sX+oN/YR9cT+oZ7YPwa2/nSN373AbhGxU0S8DjgauKnFbZIkSZKk9V6/GfHLzFci4hTgJ8Bg4KrMfKjFzZIkSZKk9V6/CX4Amfkj4EetbkcD9OupqGo5+4d6Yv9Qb+wj6on9Qz2xfwxg/ebmLpIkSZKk5uhP1/hJkiRJkprA4NdAEXFIRPwuIuZGxKRWt0etExHzIuK3ETEnImaXZVtGxK0R8Vj5fYuyPCLi62W/+U1E7Nva1qvRIuKqiFgUEQ/WlPW5P0TEhLL+YxExoRXHosbrpn+cGxF/Lt9D5kTEoTXrziz7x+8i4oM15f4OqqCIGBkRsyLikYh4KCJOLct9DxHQYx/xfUSrcapng0TEYOD3wMEUH01xLzA+Mx9uacPUEhExDxidmX+pKfs3YGlmTi7fTLfIzC+Ub8SfAQ4F3gFcmpnvaEW71RwR8V7gBWBqZr6lLOtTf4iILYHZwGgggfuA/TLzmRYckhqom/5xLvBCZl7Uqe4ewDRgf+CNwM+AN5Wr/R1UQRGxHbBdZt4fEZtS/OyPAz6J7yGixz7y9/g+ohqO+DXO/sDczPxDZr4ETAfGtrhN6l/GAteWj6+leFNuL5+ahbuBzcs3cVVEZv4CWNqpuK/94YPArZm5tPxD7VbgkOa3Xs3WTf/ozlhgemauzMw/AnMpfv/4O6iiMvOpzLy/fLwMeATYHt9DVOqhj3TH95EByuDXONsDT9Ysz6fnHzpVWwI/jYj7IuLEsmzbzHwKijdpYJuy3L4zMPW1P9hPBp5Tyql6V7VP48P+MaBFRBuwD3APvoeoC536CPg+ohoGv8aJLsqcRztwHZCZ+wIfAk4up3J1x76jWt31B/vJwPJNYBdgb+Ap4Ktluf1jgIqITYDvAadl5vM9Ve2izD4yAHTRR3wf0WoMfo0zHxhZszwCWNCitqjFMnNB+X0RcCPF9ImF7VM4y++Lyur2nYGpr/3BfjKAZObCzFyVma8CV1C8h4D9Y0CKiKEUf9D/V2b+d1nse4g6dNVHfB9RZwa/xrkX2C0idoqI1wFHAze1uE1qgYjYuLy4mojYGPgA8CBFf2i/i9oE4Afl45uAY8s7sb0TeK59+o4qra/94SfAByJii3K6zgfKMlVQp+t8D6N4D4GifxwdERtExE7AbsCv8XdQZUVEAFcCj2Tm12pW+R4ioPs+4vuIOhvS6gZURWa+EhGnULyJDgauysyHWtwstca2wI3F+zBDgO9k5i0RcS8wIyKOB/4EHFXW/xHF3dfmAsuBieu+yWqmiJgGHAhsFRHzgXOAyfShP2Tm0og4j+IXM8C/ZGa9NwRRP9ZN/zgwIvammGY1D/gUQGY+FBEzgIeBV4CTM3NVuR1/B1XTAcAxwG8jYk5Zdha+h+j/dNdHxvs+olp+nIMkSZIkVZxTPSVJkiSp4gx+kiRJklRxBj9JkiRJqjiDnyRJkiRVnMFPkiRJkirO4CdJkiRJFWfwkyRJkqSKM/hJkiRJUsX9f0qcLu2HJ/XWAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "col = \"CREDIT_DAY_OVERDUE\"\n",
    "creditdays_hist = df_bureau.sort_values(col, ascending = False).groupby(\"SK_ID_CURR\").first()[[col]]\n",
    "print(\"With {:d} customers, {:d} have 0-day overdue.\".format(len(creditdays_hist), sum(creditdays_hist[col] == 0)))\n",
    "creditdays_hist[creditdays_hist[col] != 0].plot.hist(\n",
    "    bins = int(max(creditdays_hist[col])/5),\n",
    "    title = \"Histogram of max credit days overdue\", figsize = (15, 5) )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "      <th>large_bureau_count</th>\n",
       "      <th>(DAYS_CREDIT, mean)</th>\n",
       "      <th>(DAYS_CREDIT, max)</th>\n",
       "      <th>(DAYS_CREDIT, min)</th>\n",
       "      <th>(DAYS_CREDIT, std)</th>\n",
       "      <th>DAYS_CREDIT</th>\n",
       "      <th>DAYS_CREDIT_mainb</th>\n",
       "      <th>max_overdue</th>\n",
       "      <th>has_overdue</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-874.00</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-1437.0</td>\n",
       "      <td>431.451040</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100003</td>\n",
       "      <td>train</td>\n",
       "      <td>4.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-1400.75</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-2586.0</td>\n",
       "      <td>909.826128</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100004</td>\n",
       "      <td>train</td>\n",
       "      <td>2.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-867.00</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-1326.0</td>\n",
       "      <td>649.124025</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau  large_bureau_count  \\\n",
       "0      100002      train           8.0      False               False   \n",
       "1      100003      train           4.0      False               False   \n",
       "2      100004      train           2.0      False               False   \n",
       "\n",
       "   (DAYS_CREDIT, mean)  (DAYS_CREDIT, max)  (DAYS_CREDIT, min)  \\\n",
       "0              -874.00              -103.0             -1437.0   \n",
       "1             -1400.75              -606.0             -2586.0   \n",
       "2              -867.00              -408.0             -1326.0   \n",
       "\n",
       "   (DAYS_CREDIT, std)  DAYS_CREDIT  DAYS_CREDIT_mainb  max_overdue has_overdue  \n",
       "0          431.451040       -103.0             -103.0          0.0       False  \n",
       "1          909.826128       -606.0             -606.0          0.0       False  \n",
       "2          649.124025       -408.0             -408.0          0.0       False  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "creditdays_hist[\"has_overdue\"] = creditdays_hist[col] != 0\n",
    "df_feat = merge_feature(df_feat, creditdays_hist, rename = {col : \"max_overdue\"})\n",
    "df_feat.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CREDIT_TYPE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Consumer credit' 'Credit card' 'Mortgage' 'Car loan' 'Microloan'\n",
      " 'Loan for working capital replenishment' 'Loan for business development'\n",
      " 'Real estate loan' 'Unknown type of loan' 'Another type of loan'\n",
      " 'Cash loan (non-earmarked)' 'Loan for the purchase of equipment'\n",
      " 'Mobile operator loan' 'Interbank credit'\n",
      " 'Loan for purchase of shares (margin lending)']\n"
     ]
    }
   ],
   "source": [
    "col = \"CREDIT_TYPE\"\n",
    "types = df_bureau[col].unique()\n",
    "print(types)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "def type_merge(x):\n",
    "    if \"loan\" in x or \"Loan\" in x: return \"Loan\"\n",
    "    else: return x\n",
    "\n",
    "df_bureau[\"credit_type_coalease\"] = df_bureau[col].apply(lambda x: type_merge(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"5\" halign=\"left\">SK_ID_BUREAU</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>credit_type_coalease</th>\n",
       "      <th>Consumer credit</th>\n",
       "      <th>Credit card</th>\n",
       "      <th>Interbank credit</th>\n",
       "      <th>Loan</th>\n",
       "      <th>Mortgage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>100001</th>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100002</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        SK_ID_BUREAU                                    \\\n",
       "credit_type_coalease Consumer credit Credit card Interbank credit Loan   \n",
       "SK_ID_CURR                                                               \n",
       "100001                           7.0         NaN              NaN  NaN   \n",
       "100002                           4.0         4.0              NaN  NaN   \n",
       "\n",
       "                               \n",
       "credit_type_coalease Mortgage  \n",
       "SK_ID_CURR                     \n",
       "100001                    NaN  \n",
       "100002                    NaN  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_type_group = df_bureau.groupby([\"SK_ID_CURR\", \"credit_type_coalease\"]).count()[[\"SK_ID_BUREAU\"]].reset_index()\n",
    "credit_type = credit_type_group.pivot(index=\"SK_ID_CURR\", columns=\"credit_type_coalease\")\n",
    "credit_type.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SK_ID_CURR</th>\n",
       "      <th>train_test</th>\n",
       "      <th>count_bureau</th>\n",
       "      <th>no_bureau</th>\n",
       "      <th>large_bureau_count</th>\n",
       "      <th>(DAYS_CREDIT, mean)</th>\n",
       "      <th>(DAYS_CREDIT, max)</th>\n",
       "      <th>(DAYS_CREDIT, min)</th>\n",
       "      <th>(DAYS_CREDIT, std)</th>\n",
       "      <th>DAYS_CREDIT</th>\n",
       "      <th>DAYS_CREDIT_mainb</th>\n",
       "      <th>max_overdue</th>\n",
       "      <th>has_overdue</th>\n",
       "      <th>Consumer credit</th>\n",
       "      <th>Credit card</th>\n",
       "      <th>Interbank credit</th>\n",
       "      <th>Loan</th>\n",
       "      <th>Mortgage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100002</td>\n",
       "      <td>train</td>\n",
       "      <td>8.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-874.00</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-1437.0</td>\n",
       "      <td>431.451040</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>-103.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100003</td>\n",
       "      <td>train</td>\n",
       "      <td>4.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-1400.75</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-2586.0</td>\n",
       "      <td>909.826128</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>-606.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100004</td>\n",
       "      <td>train</td>\n",
       "      <td>2.0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>-867.00</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-1326.0</td>\n",
       "      <td>649.124025</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>-408.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SK_ID_CURR train_test  count_bureau  no_bureau  large_bureau_count  \\\n",
       "0      100002      train           8.0      False               False   \n",
       "1      100003      train           4.0      False               False   \n",
       "2      100004      train           2.0      False               False   \n",
       "\n",
       "   (DAYS_CREDIT, mean)  (DAYS_CREDIT, max)  (DAYS_CREDIT, min)  \\\n",
       "0              -874.00              -103.0             -1437.0   \n",
       "1             -1400.75              -606.0             -2586.0   \n",
       "2              -867.00              -408.0             -1326.0   \n",
       "\n",
       "   (DAYS_CREDIT, std)  DAYS_CREDIT  DAYS_CREDIT_mainb  max_overdue  \\\n",
       "0          431.451040       -103.0             -103.0          0.0   \n",
       "1          909.826128       -606.0             -606.0          0.0   \n",
       "2          649.124025       -408.0             -408.0          0.0   \n",
       "\n",
       "  has_overdue  Consumer credit  Credit card  Interbank credit  Loan  Mortgage  \n",
       "0       False              4.0          4.0               0.0   0.0       0.0  \n",
       "1       False              2.0          2.0               0.0   0.0       0.0  \n",
       "2       False              2.0          0.0               0.0   0.0       0.0  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_type.columns = credit_type.columns.droplevel()\n",
    "credit_type = credit_type.reset_index()\n",
    "df_feat = merge_feature(df_feat, credit_type.fillna(0.0))\n",
    "df_feat.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_feat.to_csv(\"./features/bureau_features.csv\", index = False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
